tweego/html.go
2020-02-24 14:28:13 -06:00

98 lines
1.9 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
Copyright © 20142020 Thomas Michael Edwards. All rights reserved.
Use of this source code is governed by a Simplified BSD License which
can be found in the LICENSE file.
*/
package main
import (
// standard packages
"bytes"
"regexp"
// external packages
"golang.org/x/net/html"
)
func getDocumentTree(source []byte) (*html.Node, error) {
return html.Parse(bytes.NewReader(source))
}
func getElementByID(node *html.Node, idPat string) *html.Node {
return getElementByIDAndTag(node, idPat, "")
}
func getElementByTag(node *html.Node, tag string) *html.Node {
return getElementByIDAndTag(node, "", tag)
}
func getElementByIDAndTag(node *html.Node, idPat, tag string) *html.Node {
if node == nil {
return nil
}
var (
tagOK = false
idOK = false
)
if node.Type == html.ElementNode {
if tag == "" || tag == node.Data {
tagOK = true
}
if idPat == "" {
idOK = true
} else if len(node.Attr) > 0 {
re := regexp.MustCompile(idPat)
for _, attr := range node.Attr {
if attr.Key == "id" && re.MatchString(attr.Val) {
idOK = true
}
}
}
if tagOK && idOK {
return node
}
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
if result := getElementByIDAndTag(child, idPat, tag); result != nil {
return result
}
}
return nil
}
func hasAttr(node *html.Node, attrName string) bool {
for _, attr := range node.Attr {
if attrName == attr.Key {
return true
}
}
return false
}
func hasAttrRe(node *html.Node, attrRe *regexp.Regexp) bool {
for _, attr := range node.Attr {
if attrRe.MatchString(attr.Key) {
return true
}
}
return false
}
func getAttr(node *html.Node, attrName string) *html.Attribute {
for _, attr := range node.Attr {
if attrName == attr.Key {
return &attr
}
}
return nil
}
func getAttrRe(node *html.Node, attrRe *regexp.Regexp) *html.Attribute {
for _, attr := range node.Attr {
if attrRe.MatchString(attr.Key) {
return &attr
}
}
return nil
}