-
Notifications
You must be signed in to change notification settings - Fork 3
/
spider.go
67 lines (59 loc) · 1.83 KB
/
spider.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
package main
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"net/http"
"net/url"
"time"
)
func getSpiderHeaders() http.Header {
header := http.Header{}
header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
header.Set("Accept-Language", "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7")
header.Set("Cache-Control", "max-age=0")
header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36")
header.Set("Sec-Fetch-Dest", "document")
header.Set("Sec-Fetch-Mode", "navigate")
header.Set("Sec-Fetch-Site", "none")
header.Set("Sec-Fetch-User", "?1")
header.Set("Upgrade-Insecure-Requests", "1")
return header
}
var spiderClient *http.Client
func init() {
transport := &http.Transport{
Proxy: http.ProxyFromEnvironment,
TLSHandshakeTimeout: 10 * time.Second,
}
spiderClient = &http.Client{
Transport: transport,
Timeout: 30 * time.Second,
}
}
// FetchVersions fetches the versions of BDS and returns a map of platform and package link.
func FetchVersions(link string) (map[string]*url.URL, error) {
request, err := http.NewRequest("GET", link, nil)
if err != nil {
return nil, err
}
request.Header = getSpiderHeaders()
res, err := spiderClient.Do(request)
if err != nil {
return nil, err
}
defer res.Body.Close()
if res.StatusCode != 200 {
return nil, fmt.Errorf("failed to fetch version with given url, code=%d", res.StatusCode)
}
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return nil, err
}
result := make(map[string]*url.URL)
doc.Find(".downloadlink").Each(func(i int, s *goquery.Selection) {
plat, _ := s.Attr("data-platform")
href, _ := s.Attr("href")
result[plat], _ = url.Parse(href)
})
return result, nil
}