-
Notifications
You must be signed in to change notification settings - Fork 48
/
extract_test.go
106 lines (93 loc) · 4.01 KB
/
extract_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
package html2article
import (
"net/http"
"testing"
"github.com/stretchr/testify/assert"
)
func TestToArticle(t *testing.T) {
t.Run("test ToArticle leiphone", func(t *testing.T) {
assert := assert.New(t)
testCases := []struct {
Url string
ExpectClass string
}{
//雷锋网
{"https://www.leiphone.com/news/201602/DsiQtR6c1jCu7iwA.html", "lph-article-comView"},
{"https://www.leiphone.com/news/201708/JQRI6UI8uavpzMwF.html", "lph-article-comView"},
{"https://www.leiphone.com/news/201708/wlY7YUnEmYHwFFfN.html", "lph-article-comView"},
{"https://www.leiphone.com/news/201708/DMdo0sSckwJ5FGEv.html", "lph-article-comView"},
{"https://www.leiphone.com/news/201703/5iXkHxC5eR9VuHpv.html", "lph-article-comView"},
{"https://www.leiphone.com/news/201708/pIV08b9HKahnoYIM.html", "lph-article-comView"},
{"https://www.leiphone.com/news/201708/Gs4XTJ1dDPRe328z.html", "lph-article-comView"},
{"https://www.leiphone.com/news/201707/RRiH46oUsrJSybq0.html", "lph-article-comView"},
{"https://www.leiphone.com/news/201708/UixD9DKRXaUTts1d.html", "lph-article-comView"},
{"https://www.leiphone.com/news/201703/OVX5oq3f5jR81wnr.html", "lph-article-comView"},
{"http://www.leiphone.com/news/201701/Tb4KueUFvTWNUPRb.html", "lph-article-comView"},
{"http://www.leiphone.com/news/201701/gFgzIMAQCaw82kkw.html", "lph-article-comView"},
{"http://www.leiphone.com/news/201701/rxHljWvlNxOzPKI5.html", "lph-article-comView"},
{"http://www.leiphone.com/news/201701/03pLjLLM8nbUgmMA.html", "lph-article-comView"},
{"http://www.leiphone.com/news/201701/JFYc1GmvsR3Taeqq.html", "lph-article-comView"},
{"http://www.leiphone.com/news/201703/Od6mC55tGNF0HtYZ.html", "lph-article-comView"},
{"http://www.leiphone.com/news/201608/om47X9OuSsTapSgp.html", "lph-article-comView"},
{"http://www.leiphone.com/news/201704/3tl33V96ZY8fbyGW.html", "lph-article-comView"},
{"http://www.leiphone.com/news/201702/ayGjmykd2NPtU0on.html", "lph-article-comView"},
{"http://www.leiphone.com/news/201703/Fk7yvXGixq3ioNwn.html", "lph-article-comView"},
{"http://www.leiphone.com/news/201703/FsCPnwVXvuF8ntVA.html", "lph-article-comView"},
{"http://www.leiphone.com/news/201704/4uJXa3clD8X7Ahbo.html", "lph-article-comView"},
{"http://www.leiphone.com/news/201610/Bo67kHXGUcXbDFAL.html", "lph-article-comView"},
{"http://www.leiphone.com/news/201702/XwhHugKHTk86WQso.html", "lph-article-comView"},
}
for _, testCase := range testCases {
ext, _ := NewFromUrl(testCase.Url)
article, err := ext.ToArticle()
if err != nil {
t.Error("error", err.Error())
continue
}
assert.Nil(err)
if attr(article.contentNode, "class") != testCase.ExpectClass {
t.Errorf("ToArticle %s error,got %v, want %v", testCase.Url, attr(article.contentNode, "class"), testCase.ExpectClass)
}
if article.Publishtime < 1405732300 || article.Publishtime > 1555732300 {
t.Errorf("ToArticle %s error,got %v", testCase.Url, article.Publishtime)
}
}
})
t.Run("test ToArticle others", func(t *testing.T) {
assert := assert.New(t)
testCases := []struct {
Url string
ExpectClass string
}{
{"http://cj.sina.com.cn/article/detail/5835524730/241716?column=hkstock&ch=9", "article article_16"},
{"http://cj.sina.com.cn/article/detail/5617263472/355836?column=stock&ch=9", "article article_16"},
}
for _, testCase := range testCases {
ext, _ := NewFromUrl(testCase.Url)
article, err := ext.ToArticle()
if err != nil {
t.Error("error", err.Error())
continue
}
assert.Nil(err)
assert.Equal(attr(article.contentNode, "class"), testCase.ExpectClass)
assert.True(article.Publishtime > 1405732300)
assert.True(article.Publishtime < 1555732300)
}
})
}
func BenchmarkToArticle(b *testing.B) {
urlStr := "https://www.leiphone.com/news/201602/DsiQtR6c1jCu7iwA.html"
resp, err := http.Get(urlStr)
if err != nil {
return
}
defer resp.Body.Close()
for i := 0; i < b.N; i++ {
ext, err := NewFromReader(resp.Body)
if err != nil {
b.Fatal(err.Error())
}
ext.ToArticle()
}
}