peridot/vendor/github.com/antchfx/xpath/query.go
2022-07-07 22:13:21 +02:00

923 lines
18 KiB
Go

package xpath
import (
"bytes"
"fmt"
"hash/fnv"
"reflect"
)
type iterator interface {
Current() NodeNavigator
}
// An XPath query interface.
type query interface {
// Select traversing iterator returns a query matched node NodeNavigator.
Select(iterator) NodeNavigator
// Evaluate evaluates query and returns values of the current query.
Evaluate(iterator) interface{}
Clone() query
}
// nopQuery is an empty query that always return nil for any query.
type nopQuery struct {
query
}
func (nopQuery) Select(iterator) NodeNavigator { return nil }
func (nopQuery) Evaluate(iterator) interface{} { return nil }
func (nopQuery) Clone() query { return nopQuery{} }
// contextQuery is returns current node on the iterator object query.
type contextQuery struct {
count int
Root bool // Moving to root-level node in the current context iterator.
}
func (c *contextQuery) Select(t iterator) (n NodeNavigator) {
if c.count == 0 {
c.count++
n = t.Current().Copy()
if c.Root {
n.MoveToRoot()
}
}
return n
}
func (c *contextQuery) Evaluate(iterator) interface{} {
c.count = 0
return c
}
func (c *contextQuery) Clone() query {
return &contextQuery{count: 0, Root: c.Root}
}
// ancestorQuery is an XPath ancestor node query.(ancestor::*|ancestor-self::*)
type ancestorQuery struct {
iterator func() NodeNavigator
Self bool
Input query
Predicate func(NodeNavigator) bool
}
func (a *ancestorQuery) Select(t iterator) NodeNavigator {
for {
if a.iterator == nil {
node := a.Input.Select(t)
if node == nil {
return nil
}
first := true
node = node.Copy()
a.iterator = func() NodeNavigator {
if first && a.Self {
first = false
if a.Predicate(node) {
return node
}
}
for node.MoveToParent() {
if !a.Predicate(node) {
continue
}
return node
}
return nil
}
}
if node := a.iterator(); node != nil {
return node
}
a.iterator = nil
}
}
func (a *ancestorQuery) Evaluate(t iterator) interface{} {
a.Input.Evaluate(t)
a.iterator = nil
return a
}
func (a *ancestorQuery) Test(n NodeNavigator) bool {
return a.Predicate(n)
}
func (a *ancestorQuery) Clone() query {
return &ancestorQuery{Self: a.Self, Input: a.Input.Clone(), Predicate: a.Predicate}
}
// attributeQuery is an XPath attribute node query.(@*)
type attributeQuery struct {
iterator func() NodeNavigator
Input query
Predicate func(NodeNavigator) bool
}
func (a *attributeQuery) Select(t iterator) NodeNavigator {
for {
if a.iterator == nil {
node := a.Input.Select(t)
if node == nil {
return nil
}
node = node.Copy()
a.iterator = func() NodeNavigator {
for {
onAttr := node.MoveToNextAttribute()
if !onAttr {
return nil
}
if a.Predicate(node) {
return node
}
}
}
}
if node := a.iterator(); node != nil {
return node
}
a.iterator = nil
}
}
func (a *attributeQuery) Evaluate(t iterator) interface{} {
a.Input.Evaluate(t)
a.iterator = nil
return a
}
func (a *attributeQuery) Test(n NodeNavigator) bool {
return a.Predicate(n)
}
func (a *attributeQuery) Clone() query {
return &attributeQuery{Input: a.Input.Clone(), Predicate: a.Predicate}
}
// childQuery is an XPath child node query.(child::*)
type childQuery struct {
posit int
iterator func() NodeNavigator
Input query
Predicate func(NodeNavigator) bool
}
func (c *childQuery) Select(t iterator) NodeNavigator {
for {
if c.iterator == nil {
c.posit = 0
node := c.Input.Select(t)
if node == nil {
return nil
}
node = node.Copy()
first := true
c.iterator = func() NodeNavigator {
for {
if (first && !node.MoveToChild()) || (!first && !node.MoveToNext()) {
return nil
}
first = false
if c.Predicate(node) {
return node
}
}
}
}
if node := c.iterator(); node != nil {
c.posit++
return node
}
c.iterator = nil
}
}
func (c *childQuery) Evaluate(t iterator) interface{} {
c.Input.Evaluate(t)
c.iterator = nil
return c
}
func (c *childQuery) Test(n NodeNavigator) bool {
return c.Predicate(n)
}
func (c *childQuery) Clone() query {
return &childQuery{Input: c.Input.Clone(), Predicate: c.Predicate}
}
// position returns a position of current NodeNavigator.
func (c *childQuery) position() int {
return c.posit
}
// descendantQuery is an XPath descendant node query.(descendant::* | descendant-or-self::*)
type descendantQuery struct {
iterator func() NodeNavigator
posit int
level int
Self bool
Input query
Predicate func(NodeNavigator) bool
}
func (d *descendantQuery) Select(t iterator) NodeNavigator {
for {
if d.iterator == nil {
d.posit = 0
node := d.Input.Select(t)
if node == nil {
return nil
}
node = node.Copy()
d.level = 0
positmap := make(map[int]int)
first := true
d.iterator = func() NodeNavigator {
if first && d.Self {
first = false
if d.Predicate(node) {
d.posit = 1
positmap[d.level] = 1
return node
}
}
for {
if node.MoveToChild() {
d.level = d.level + 1
positmap[d.level] = 0
} else {
for {
if d.level == 0 {
return nil
}
if node.MoveToNext() {
break
}
node.MoveToParent()
d.level = d.level - 1
}
}
if d.Predicate(node) {
positmap[d.level]++
d.posit = positmap[d.level]
return node
}
}
}
}
if node := d.iterator(); node != nil {
return node
}
d.iterator = nil
}
}
func (d *descendantQuery) Evaluate(t iterator) interface{} {
d.Input.Evaluate(t)
d.iterator = nil
return d
}
func (d *descendantQuery) Test(n NodeNavigator) bool {
return d.Predicate(n)
}
// position returns a position of current NodeNavigator.
func (d *descendantQuery) position() int {
return d.posit
}
func (d *descendantQuery) depth() int {
return d.level
}
func (d *descendantQuery) Clone() query {
return &descendantQuery{Self: d.Self, Input: d.Input.Clone(), Predicate: d.Predicate}
}
// followingQuery is an XPath following node query.(following::*|following-sibling::*)
type followingQuery struct {
posit int
iterator func() NodeNavigator
Input query
Sibling bool // The matching sibling node of current node.
Predicate func(NodeNavigator) bool
}
func (f *followingQuery) Select(t iterator) NodeNavigator {
for {
if f.iterator == nil {
f.posit = 0
node := f.Input.Select(t)
if node == nil {
return nil
}
node = node.Copy()
if f.Sibling {
f.iterator = func() NodeNavigator {
for {
if !node.MoveToNext() {
return nil
}
if f.Predicate(node) {
f.posit++
return node
}
}
}
} else {
var q *descendantQuery // descendant query
f.iterator = func() NodeNavigator {
for {
if q == nil {
for !node.MoveToNext() {
if !node.MoveToParent() {
return nil
}
}
q = &descendantQuery{
Self: true,
Input: &contextQuery{},
Predicate: f.Predicate,
}
t.Current().MoveTo(node)
}
if node := q.Select(t); node != nil {
f.posit = q.posit
return node
}
q = nil
}
}
}
}
if node := f.iterator(); node != nil {
return node
}
f.iterator = nil
}
}
func (f *followingQuery) Evaluate(t iterator) interface{} {
f.Input.Evaluate(t)
return f
}
func (f *followingQuery) Test(n NodeNavigator) bool {
return f.Predicate(n)
}
func (f *followingQuery) Clone() query {
return &followingQuery{Input: f.Input.Clone(), Sibling: f.Sibling, Predicate: f.Predicate}
}
func (f *followingQuery) position() int {
return f.posit
}
// precedingQuery is an XPath preceding node query.(preceding::*)
type precedingQuery struct {
iterator func() NodeNavigator
posit int
Input query
Sibling bool // The matching sibling node of current node.
Predicate func(NodeNavigator) bool
}
func (p *precedingQuery) Select(t iterator) NodeNavigator {
for {
if p.iterator == nil {
p.posit = 0
node := p.Input.Select(t)
if node == nil {
return nil
}
node = node.Copy()
if p.Sibling {
p.iterator = func() NodeNavigator {
for {
for !node.MoveToPrevious() {
return nil
}
if p.Predicate(node) {
p.posit++
return node
}
}
}
} else {
var q query
p.iterator = func() NodeNavigator {
for {
if q == nil {
for !node.MoveToPrevious() {
if !node.MoveToParent() {
return nil
}
p.posit = 0
}
q = &descendantQuery{
Self: true,
Input: &contextQuery{},
Predicate: p.Predicate,
}
t.Current().MoveTo(node)
}
if node := q.Select(t); node != nil {
p.posit++
return node
}
q = nil
}
}
}
}
if node := p.iterator(); node != nil {
return node
}
p.iterator = nil
}
}
func (p *precedingQuery) Evaluate(t iterator) interface{} {
p.Input.Evaluate(t)
return p
}
func (p *precedingQuery) Test(n NodeNavigator) bool {
return p.Predicate(n)
}
func (p *precedingQuery) Clone() query {
return &precedingQuery{Input: p.Input.Clone(), Sibling: p.Sibling, Predicate: p.Predicate}
}
func (p *precedingQuery) position() int {
return p.posit
}
// parentQuery is an XPath parent node query.(parent::*)
type parentQuery struct {
Input query
Predicate func(NodeNavigator) bool
}
func (p *parentQuery) Select(t iterator) NodeNavigator {
for {
node := p.Input.Select(t)
if node == nil {
return nil
}
node = node.Copy()
if node.MoveToParent() && p.Predicate(node) {
return node
}
}
}
func (p *parentQuery) Evaluate(t iterator) interface{} {
p.Input.Evaluate(t)
return p
}
func (p *parentQuery) Clone() query {
return &parentQuery{Input: p.Input.Clone(), Predicate: p.Predicate}
}
func (p *parentQuery) Test(n NodeNavigator) bool {
return p.Predicate(n)
}
// selfQuery is an Self node query.(self::*)
type selfQuery struct {
Input query
Predicate func(NodeNavigator) bool
}
func (s *selfQuery) Select(t iterator) NodeNavigator {
for {
node := s.Input.Select(t)
if node == nil {
return nil
}
if s.Predicate(node) {
return node
}
}
}
func (s *selfQuery) Evaluate(t iterator) interface{} {
s.Input.Evaluate(t)
return s
}
func (s *selfQuery) Test(n NodeNavigator) bool {
return s.Predicate(n)
}
func (s *selfQuery) Clone() query {
return &selfQuery{Input: s.Input.Clone(), Predicate: s.Predicate}
}
// filterQuery is an XPath query for predicate filter.
type filterQuery struct {
Input query
Predicate query
posit int
positmap map[int]int
}
func (f *filterQuery) do(t iterator) bool {
val := reflect.ValueOf(f.Predicate.Evaluate(t))
switch val.Kind() {
case reflect.Bool:
return val.Bool()
case reflect.String:
return len(val.String()) > 0
case reflect.Float64:
pt := getNodePosition(f.Input)
return int(val.Float()) == pt
default:
if q, ok := f.Predicate.(query); ok {
return q.Select(t) != nil
}
}
return false
}
func (f *filterQuery) position() int {
return f.posit
}
func (f *filterQuery) Select(t iterator) NodeNavigator {
if f.positmap == nil {
f.positmap = make(map[int]int)
}
for {
node := f.Input.Select(t)
if node == nil {
return node
}
node = node.Copy()
t.Current().MoveTo(node)
if f.do(t) {
// fix https://github.com/antchfx/htmlquery/issues/26
// Calculate and keep the each of matching node's position in the same depth.
level := getNodeDepth(f.Input)
f.positmap[level]++
f.posit = f.positmap[level]
return node
}
}
}
func (f *filterQuery) Evaluate(t iterator) interface{} {
f.Input.Evaluate(t)
return f
}
func (f *filterQuery) Clone() query {
return &filterQuery{Input: f.Input.Clone(), Predicate: f.Predicate.Clone()}
}
// functionQuery is an XPath function that returns a computed value for
// the Evaluate call of the current NodeNavigator node. Select call isn't
// applicable for functionQuery.
type functionQuery struct {
Input query // Node Set
Func func(query, iterator) interface{} // The xpath function.
}
func (f *functionQuery) Select(t iterator) NodeNavigator {
return nil
}
// Evaluate call a specified function that will returns the
// following value type: number,string,boolean.
func (f *functionQuery) Evaluate(t iterator) interface{} {
return f.Func(f.Input, t)
}
func (f *functionQuery) Clone() query {
return &functionQuery{Input: f.Input.Clone(), Func: f.Func}
}
// transformFunctionQuery diffs from functionQuery where the latter computes a scalar
// value (number,string,boolean) for the current NodeNavigator node while the former
// (transformFunctionQuery) performs a mapping or transform of the current NodeNavigator
// and returns a new NodeNavigator. It is used for non-scalar XPath functions such as
// reverse(), remove(), subsequence(), unordered(), etc.
type transformFunctionQuery struct {
Input query
Func func(query, iterator) func() NodeNavigator
iterator func() NodeNavigator
}
func (f *transformFunctionQuery) Select(t iterator) NodeNavigator {
if f.iterator == nil {
f.iterator = f.Func(f.Input, t)
}
return f.iterator()
}
func (f *transformFunctionQuery) Evaluate(t iterator) interface{} {
f.Input.Evaluate(t)
f.iterator = nil
return f
}
func (f *transformFunctionQuery) Clone() query {
return &transformFunctionQuery{Input: f.Input.Clone(), Func: f.Func}
}
// constantQuery is an XPath constant operand.
type constantQuery struct {
Val interface{}
}
func (c *constantQuery) Select(t iterator) NodeNavigator {
return nil
}
func (c *constantQuery) Evaluate(t iterator) interface{} {
return c.Val
}
func (c *constantQuery) Clone() query {
return c
}
// logicalQuery is an XPath logical expression.
type logicalQuery struct {
Left, Right query
Do func(iterator, interface{}, interface{}) interface{}
}
func (l *logicalQuery) Select(t iterator) NodeNavigator {
// When a XPath expr is logical expression.
node := t.Current().Copy()
val := l.Evaluate(t)
switch val.(type) {
case bool:
if val.(bool) == true {
return node
}
}
return nil
}
func (l *logicalQuery) Evaluate(t iterator) interface{} {
m := l.Left.Evaluate(t)
n := l.Right.Evaluate(t)
return l.Do(t, m, n)
}
func (l *logicalQuery) Clone() query {
return &logicalQuery{Left: l.Left.Clone(), Right: l.Right.Clone(), Do: l.Do}
}
// numericQuery is an XPath numeric operator expression.
type numericQuery struct {
Left, Right query
Do func(interface{}, interface{}) interface{}
}
func (n *numericQuery) Select(t iterator) NodeNavigator {
return nil
}
func (n *numericQuery) Evaluate(t iterator) interface{} {
m := n.Left.Evaluate(t)
k := n.Right.Evaluate(t)
return n.Do(m, k)
}
func (n *numericQuery) Clone() query {
return &numericQuery{Left: n.Left.Clone(), Right: n.Right.Clone(), Do: n.Do}
}
type booleanQuery struct {
IsOr bool
Left, Right query
iterator func() NodeNavigator
}
func (b *booleanQuery) Select(t iterator) NodeNavigator {
if b.iterator == nil {
var list []NodeNavigator
i := 0
root := t.Current().Copy()
if b.IsOr {
for {
node := b.Left.Select(t)
if node == nil {
break
}
node = node.Copy()
list = append(list, node)
}
t.Current().MoveTo(root)
for {
node := b.Right.Select(t)
if node == nil {
break
}
node = node.Copy()
list = append(list, node)
}
} else {
var m []NodeNavigator
var n []NodeNavigator
for {
node := b.Left.Select(t)
if node == nil {
break
}
node = node.Copy()
list = append(m, node)
}
t.Current().MoveTo(root)
for {
node := b.Right.Select(t)
if node == nil {
break
}
node = node.Copy()
list = append(n, node)
}
for _, k := range m {
for _, j := range n {
if k == j {
list = append(list, k)
}
}
}
}
b.iterator = func() NodeNavigator {
if i >= len(list) {
return nil
}
node := list[i]
i++
return node
}
}
return b.iterator()
}
func (b *booleanQuery) Evaluate(t iterator) interface{} {
m := b.Left.Evaluate(t)
left := asBool(t, m)
if b.IsOr && left {
return true
} else if !b.IsOr && !left {
return false
}
m = b.Right.Evaluate(t)
return asBool(t, m)
}
func (b *booleanQuery) Clone() query {
return &booleanQuery{IsOr: b.IsOr, Left: b.Left.Clone(), Right: b.Right.Clone()}
}
type unionQuery struct {
Left, Right query
iterator func() NodeNavigator
}
func (u *unionQuery) Select(t iterator) NodeNavigator {
if u.iterator == nil {
var list []NodeNavigator
var m = make(map[uint64]bool)
root := t.Current().Copy()
for {
node := u.Left.Select(t)
if node == nil {
break
}
code := getHashCode(node.Copy())
if _, ok := m[code]; !ok {
m[code] = true
list = append(list, node.Copy())
}
}
t.Current().MoveTo(root)
for {
node := u.Right.Select(t)
if node == nil {
break
}
code := getHashCode(node.Copy())
if _, ok := m[code]; !ok {
m[code] = true
list = append(list, node.Copy())
}
}
var i int
u.iterator = func() NodeNavigator {
if i >= len(list) {
return nil
}
node := list[i]
i++
return node
}
}
return u.iterator()
}
func (u *unionQuery) Evaluate(t iterator) interface{} {
u.iterator = nil
u.Left.Evaluate(t)
u.Right.Evaluate(t)
return u
}
func (u *unionQuery) Clone() query {
return &unionQuery{Left: u.Left.Clone(), Right: u.Right.Clone()}
}
func getHashCode(n NodeNavigator) uint64 {
var sb bytes.Buffer
switch n.NodeType() {
case AttributeNode, TextNode, CommentNode:
sb.WriteString(fmt.Sprintf("%s=%s", n.LocalName(), n.Value()))
// https://github.com/antchfx/htmlquery/issues/25
d := 1
for n.MoveToPrevious() {
d++
}
sb.WriteString(fmt.Sprintf("-%d", d))
for n.MoveToParent() {
d = 1
for n.MoveToPrevious() {
d++
}
sb.WriteString(fmt.Sprintf("-%d", d))
}
case ElementNode:
sb.WriteString(n.Prefix() + n.LocalName())
d := 1
for n.MoveToPrevious() {
d++
}
sb.WriteString(fmt.Sprintf("-%d", d))
for n.MoveToParent() {
d = 1
for n.MoveToPrevious() {
d++
}
sb.WriteString(fmt.Sprintf("-%d", d))
}
}
h := fnv.New64a()
h.Write([]byte(sb.String()))
return h.Sum64()
}
func getNodePosition(q query) int {
type Position interface {
position() int
}
if count, ok := q.(Position); ok {
return count.position()
}
return 1
}
func getNodeDepth(q query) int {
type Depth interface {
depth() int
}
if count, ok := q.(Depth); ok {
return count.depth()
}
return 0
}