/* Package xmlquery provides extract data from XML documents using XPath expression. */ package xmlquery import ( "fmt" "strings" "github.com/antchfx/xpath" ) // SelectElements finds child elements with the specified name. func (n *Node) SelectElements(name string) []*Node { return Find(n, name) } // SelectElement finds child elements with the specified name. func (n *Node) SelectElement(name string) *Node { return FindOne(n, name) } // SelectAttr returns the attribute value with the specified name. func (n *Node) SelectAttr(name string) string { if n.Type == AttributeNode { if n.Data == name { return n.InnerText() } return "" } var local, space string local = name if i := strings.Index(name, ":"); i > 0 { space = name[:i] local = name[i+1:] } for _, attr := range n.Attr { if attr.Name.Local == local && attr.Name.Space == space { return attr.Value } } return "" } var _ xpath.NodeNavigator = &NodeNavigator{} // CreateXPathNavigator creates a new xpath.NodeNavigator for the specified // XML Node. func CreateXPathNavigator(top *Node) *NodeNavigator { return &NodeNavigator{curr: top, root: top, attr: -1} } func getCurrentNode(it *xpath.NodeIterator) *Node { n := it.Current().(*NodeNavigator) if n.NodeType() == xpath.AttributeNode { childNode := &Node{ Type: TextNode, Data: n.Value(), } return &Node{ Parent: n.curr, Type: AttributeNode, Data: n.LocalName(), FirstChild: childNode, LastChild: childNode, } } return n.curr } // Find is like QueryAll but panics if `expr` is not a valid XPath expression. // See `QueryAll()` function. func Find(top *Node, expr string) []*Node { nodes, err := QueryAll(top, expr) if err != nil { panic(err) } return nodes } // FindOne is like Query but panics if `expr` is not a valid XPath expression. // See `Query()` function. func FindOne(top *Node, expr string) *Node { node, err := Query(top, expr) if err != nil { panic(err) } return node } // QueryAll searches the XML Node that matches by the specified XPath expr. // Returns an error if the expression `expr` cannot be parsed. func QueryAll(top *Node, expr string) ([]*Node, error) { exp, err := getQuery(expr) if err != nil { return nil, err } return QuerySelectorAll(top, exp), nil } // Query searches the XML Node that matches by the specified XPath expr, // and returns first matched element. func Query(top *Node, expr string) (*Node, error) { exp, err := getQuery(expr) if err != nil { return nil, err } return QuerySelector(top, exp), nil } // QuerySelectorAll searches all of the XML Node that matches the specified // XPath selectors. func QuerySelectorAll(top *Node, selector *xpath.Expr) []*Node { t := selector.Select(CreateXPathNavigator(top)) var elems []*Node for t.MoveNext() { elems = append(elems, getCurrentNode(t)) } return elems } // QuerySelector returns the first matched XML Node by the specified XPath // selector. func QuerySelector(top *Node, selector *xpath.Expr) *Node { t := selector.Select(CreateXPathNavigator(top)) if t.MoveNext() { return getCurrentNode(t) } return nil } // FindEach searches the html.Node and calls functions cb. // Important: this method is deprecated, instead, use for .. = range Find(){}. func FindEach(top *Node, expr string, cb func(int, *Node)) { for i, n := range Find(top, expr) { cb(i, n) } } // FindEachWithBreak functions the same as FindEach but allows to break the loop // by returning false from the callback function `cb`. // Important: this method is deprecated, instead, use .. = range Find(){}. func FindEachWithBreak(top *Node, expr string, cb func(int, *Node) bool) { for i, n := range Find(top, expr) { if !cb(i, n) { break } } } type NodeNavigator struct { root, curr *Node attr int } func (x *NodeNavigator) Current() *Node { return x.curr } func (x *NodeNavigator) NodeType() xpath.NodeType { switch x.curr.Type { case CommentNode: return xpath.CommentNode case TextNode, CharDataNode: return xpath.TextNode case DeclarationNode, DocumentNode: return xpath.RootNode case ElementNode: if x.attr != -1 { return xpath.AttributeNode } return xpath.ElementNode } panic(fmt.Sprintf("unknown XML node type: %v", x.curr.Type)) } func (x *NodeNavigator) LocalName() string { if x.attr != -1 { return x.curr.Attr[x.attr].Name.Local } return x.curr.Data } func (x *NodeNavigator) Prefix() string { if x.NodeType() == xpath.AttributeNode { if x.attr != -1 { return x.curr.Attr[x.attr].Name.Space } return "" } return x.curr.Prefix } func (x *NodeNavigator) NamespaceURL() string { if x.attr != -1 { return x.curr.Attr[x.attr].NamespaceURI } return x.curr.NamespaceURI } func (x *NodeNavigator) Value() string { switch x.curr.Type { case CommentNode: return x.curr.Data case ElementNode: if x.attr != -1 { return x.curr.Attr[x.attr].Value } return x.curr.InnerText() case TextNode: return x.curr.Data } return "" } func (x *NodeNavigator) Copy() xpath.NodeNavigator { n := *x return &n } func (x *NodeNavigator) MoveToRoot() { x.curr = x.root } func (x *NodeNavigator) MoveToParent() bool { if x.attr != -1 { x.attr = -1 return true } else if node := x.curr.Parent; node != nil { x.curr = node return true } return false } func (x *NodeNavigator) MoveToNextAttribute() bool { if x.attr >= len(x.curr.Attr)-1 { return false } x.attr++ return true } func (x *NodeNavigator) MoveToChild() bool { if x.attr != -1 { return false } if node := x.curr.FirstChild; node != nil { x.curr = node return true } return false } func (x *NodeNavigator) MoveToFirst() bool { if x.attr != -1 || x.curr.PrevSibling == nil { return false } for { node := x.curr.PrevSibling if node == nil { break } x.curr = node } return true } func (x *NodeNavigator) String() string { return x.Value() } func (x *NodeNavigator) MoveToNext() bool { if x.attr != -1 { return false } for node := x.curr.NextSibling; node != nil; node = x.curr.NextSibling { x.curr = node if x.curr.Type != TextNode { return true } } return false } func (x *NodeNavigator) MoveToPrevious() bool { if x.attr != -1 { return false } for node := x.curr.PrevSibling; node != nil; node = x.curr.PrevSibling { x.curr = node if x.curr.Type != TextNode { return true } } return false } func (x *NodeNavigator) MoveTo(other xpath.NodeNavigator) bool { node, ok := other.(*NodeNavigator) if !ok || node.root != x.root { return false } x.curr = node.curr x.attr = node.attr return true }