diff --git a/attr.go b/attr.go index cb2a599..5ea7b0b 100644 --- a/attr.go +++ b/attr.go @@ -1,6 +1,7 @@ package dom import ( + "bytes" "strings" "golang.org/x/net/html" @@ -48,3 +49,20 @@ func HasClass(node *html.Node, expectedClass string) bool { } return false } + +// - - - - // + +func collectText(n *html.Node, buf *bytes.Buffer) { + if n.Type == html.TextNode { + buf.WriteString(n.Data) + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + collectText(c, buf) + } +} + +func CollectText(node *html.Node) string { + var buf bytes.Buffer + collectText(node, &buf) + return buf.String() +} diff --git a/attr_test.go b/attr_test.go index b10993f..3af7d86 100644 --- a/attr_test.go +++ b/attr_test.go @@ -2,6 +2,7 @@ package dom import ( "reflect" + "strings" "testing" "golang.org/x/net/html" @@ -144,3 +145,25 @@ func TestHasClass(t *testing.T) { t.Error("expected different output") } } + +func TestCollectText(t *testing.T) { + input := ` +

Hello world

+

Some description

+ ` + + doc, err := html.Parse(strings.NewReader(input)) + if err != nil { + t.Fatal(err) + } + + heading := FindFirstNode(doc, func(node *html.Node) bool { + return HasClass(node, "article__title") + }) + + expected := "Hello world" + output := CollectText(heading) + if output != expected { + t.Errorf("expected %q but got %q", expected, output) + } +} diff --git a/examples/selectors/main.go b/examples/selectors/main.go new file mode 100644 index 0000000..7992751 --- /dev/null +++ b/examples/selectors/main.go @@ -0,0 +1,45 @@ +package main + +import ( + "fmt" + "log" + "strings" + + "github.com/JohannesKaufmann/dom" + "golang.org/x/net/html" +) + +func main() { + input := ` +

Github

+ +
+

JohannesKaufmann/dom

+ + +
+ ` + + doc, err := html.Parse(strings.NewReader(input)) + if err != nil { + log.Fatal(err) + } + + // - - - // + + headingNodes := dom.FindAllNodes(doc, func(node *html.Node) bool { + name := dom.NodeName(node) + return dom.NameIsHeading(name) + }) + + nameNode := dom.FindFirstNode(doc, func(node *html.Node) bool { + return dom.HasClass(node, "repo__name") + }) + repoName := dom.CollectText(nameNode) + + fmt.Printf("count:%d name:%q\n", len(headingNodes), repoName) + // count:4 name:"JohannesKaufmann/dom" +}