-
Notifications
You must be signed in to change notification settings - Fork 2
/
url_parser_test.go
134 lines (117 loc) · 3.99 KB
/
url_parser_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
package unfurlist
import (
"fmt"
"testing"
)
func ExampleParseURLs() {
text := `This text contains various urls mixed with different reserved per rfc3986 characters:
http://google.com, https://doist.com/#about (also see https://todoist.com), <http://example.com/foo>,
**[markdown](http://daringfireball.net/projects/markdown/)**,
http://marvel-movies.wikia.com/wiki/The_Avengers_(film), https://pt.wikipedia.org/wiki/Mamão.
https://docs.live.net/foo/?section-id={D7CEDACE-AEFB-4B61-9C63-BDE05EEBD80A},
http://example.com/?param=foo;bar
HTTPS://EXAMPLE.COM/UPPERCASE
hTtP://example.com/mixedCase
`
for _, u := range ParseURLs(text) {
fmt.Println(u)
}
// Output:
// http://google.com
// https://doist.com/#about
// https://todoist.com
// http://example.com/foo
// http://daringfireball.net/projects/markdown/
// http://marvel-movies.wikia.com/wiki/The_Avengers_(film)
// https://pt.wikipedia.org/wiki/Mamão
// https://docs.live.net/foo/?section-id={D7CEDACE-AEFB-4B61-9C63-BDE05EEBD80A}
// http://example.com/?param=foo;bar
// HTTPS://EXAMPLE.COM/UPPERCASE
// hTtP://example.com/mixedCase
}
func TestParseURLs__unique(t *testing.T) {
got := ParseURLs("Only two unique urls should be extracted from this text: http://google.com, http://twitter.com, http://google.com")
want := []string{"http://google.com", "http://twitter.com"}
if len(got) != len(want) {
t.Fatalf("want %v, got %v", want, got)
}
for i, v := range got {
if v != want[i] {
t.Fatalf("want %v, got %v", want, got)
}
}
}
func TestBasicURLs(t *testing.T) {
got := ParseURLs("Testing this out http://doist.com/#about https://todoist.com/chrome")
want := []string{"http://doist.com/#about", "https://todoist.com/chrome"}
if len(got) != len(want) {
t.Errorf("Length not the same got: %d != want: %d", len(got), len(want))
} else {
for i := 0; i < len(want); i++ {
if got[i] != want[i] {
t.Errorf("%q != %s", got, want)
}
}
}
}
func TestBugURL(t *testing.T) {
got := ParseURLs("Testing this out Bug report http://f.cl.ly/items/000V0N1B31283s3O350q/Screen%20Shot%202015-12-22%20at%2014.49.28.png")
want := []string{"http://f.cl.ly/items/000V0N1B31283s3O350q/Screen%20Shot%202015-12-22%20at%2014.49.28.png"}
if len(got) != len(want) {
t.Errorf("Length not the same got: %d != want: %d", len(got), len(want))
} else {
for i := 0; i < len(want); i++ {
if got[i] != want[i] {
t.Errorf("%q != %s", got, want)
}
}
}
}
func TestValidURL(t *testing.T) {
testCases := []struct {
u string
res bool
}{
{"https://example.com/path?multi+word+escaped+query", true},
{"https://example.com/path?unescaped query", false},
{"ftp://example.com/unsupported/scheme", false},
{"", false},
{"https://example.com/path", true},
{"https:///path", false},
}
for _, tc := range testCases {
if validURL(tc.u) != tc.res {
t.Fatalf("validURL(%q)==%t, want %t", tc.u, !tc.res, tc.res)
}
}
}
func TestParseMarkdownURLs(t *testing.T) {
text := `Implicit url: http://example.com/1, [explicit url](http://example.com/2).
This url should be skipped ` + "`http://example.com/3`" + `, as well as the one inside code block:
preformatted text block with url: http://example.com/4
Another paragraph with implicit link http://example.com/5.
`
got := parseMarkdownURLs(text, 10)
want := []string{"http://example.com/1", "http://example.com/2", "http://example.com/5"}
if len(got) != len(want) {
t.Fatalf("want: %v, got: %v", want, got)
}
for i := range got {
if got[i] != want[i] {
t.Fatalf("%d: want %q, got %q", i, want[i], got[i])
}
}
}
var escape []string
func BenchmarkMarkdownURLs(b *testing.B) {
text := `Implicit url: http://example.com/1, [explicit url](http://example.com/2).
This url should be skipped ` + "`http://example.com/3`" + `, as well as the one inside code block:
preformatted text block with url: http://example.com/4
Another paragraph with implicit link http://example.com/5.
`
b.ReportAllocs()
b.SetBytes(int64(len(text)))
for i := 0; i < b.N; i++ {
escape = parseMarkdownURLs(text, 10)
}
}