From 2b88f7b5c8152fff016543a95fd65b38d95f2e22 Mon Sep 17 00:00:00 2001 From: Mustafa Gezen Date: Fri, 17 Feb 2023 20:00:41 +0100 Subject: [PATCH] govendor --- go.mod | 2 + peridot/proto/v1/admin/admin.proto | 1 - .../PuerkitoBio/goquery/.gitattributes | 1 - .../github.com/PuerkitoBio/goquery/.gitignore | 16 - .../PuerkitoBio/goquery/.travis.yml | 31 - .../PuerkitoBio/goquery/BUILD.bazel | 25 - vendor/github.com/PuerkitoBio/goquery/LICENSE | 12 - .../github.com/PuerkitoBio/goquery/README.md | 190 -- .../github.com/PuerkitoBio/goquery/array.go | 124 - vendor/github.com/PuerkitoBio/goquery/doc.go | 123 - .../github.com/PuerkitoBio/goquery/expand.go | 70 - .../github.com/PuerkitoBio/goquery/filter.go | 163 -- vendor/github.com/PuerkitoBio/goquery/go.mod | 8 - vendor/github.com/PuerkitoBio/goquery/go.sum | 8 - .../PuerkitoBio/goquery/iteration.go | 39 - .../PuerkitoBio/goquery/manipulation.go | 679 ----- .../PuerkitoBio/goquery/property.go | 275 -- .../github.com/PuerkitoBio/goquery/query.go | 49 - .../PuerkitoBio/goquery/traversal.go | 698 ----- vendor/github.com/PuerkitoBio/goquery/type.go | 203 -- .../PuerkitoBio/goquery/utilities.go | 171 -- .../andybalholm/cascadia/.travis.yml | 14 - .../andybalholm/cascadia/BUILD.bazel | 15 - .../github.com/andybalholm/cascadia/LICENSE | 24 - .../github.com/andybalholm/cascadia/README.md | 9 - vendor/github.com/andybalholm/cascadia/go.mod | 5 - .../github.com/andybalholm/cascadia/parser.go | 838 ------ .../andybalholm/cascadia/selector.go | 938 ------- .../andybalholm/cascadia/serialize.go | 120 - .../andybalholm/cascadia/specificity.go | 26 - .../github.com/antchfx/htmlquery/.gitignore | 32 - .../github.com/antchfx/htmlquery/.travis.yml | 16 - .../github.com/antchfx/htmlquery/BUILD.bazel | 18 - vendor/github.com/antchfx/htmlquery/LICENSE | 17 - vendor/github.com/antchfx/htmlquery/README.md | 168 -- vendor/github.com/antchfx/htmlquery/cache.go | 42 - vendor/github.com/antchfx/htmlquery/go.mod | 9 - vendor/github.com/antchfx/htmlquery/go.sum | 11 - vendor/github.com/antchfx/htmlquery/query.go | 338 --- vendor/github.com/antchfx/xmlquery/.gitignore | 32 - .../github.com/antchfx/xmlquery/.travis.yml | 17 - .../github.com/antchfx/xmlquery/BUILD.bazel | 21 - vendor/github.com/antchfx/xmlquery/LICENSE | 17 - vendor/github.com/antchfx/xmlquery/README.md | 262 -- vendor/github.com/antchfx/xmlquery/books.xml | 121 - vendor/github.com/antchfx/xmlquery/cache.go | 43 - .../antchfx/xmlquery/cached_reader.go | 69 - vendor/github.com/antchfx/xmlquery/go.mod | 9 - vendor/github.com/antchfx/xmlquery/go.sum | 14 - vendor/github.com/antchfx/xmlquery/node.go | 232 -- vendor/github.com/antchfx/xmlquery/options.go | 30 - vendor/github.com/antchfx/xmlquery/parse.go | 365 --- vendor/github.com/antchfx/xmlquery/query.go | 309 --- vendor/github.com/antchfx/xpath/.gitignore | 32 - vendor/github.com/antchfx/xpath/.travis.yml | 12 - vendor/github.com/antchfx/xpath/BUILD.bazel | 18 - vendor/github.com/antchfx/xpath/LICENSE | 17 - vendor/github.com/antchfx/xpath/README.md | 172 -- vendor/github.com/antchfx/xpath/build.go | 522 ---- vendor/github.com/antchfx/xpath/func.go | 585 ---- vendor/github.com/antchfx/xpath/func_go110.go | 16 - .../antchfx/xpath/func_pre_go110.go | 22 - vendor/github.com/antchfx/xpath/operator.go | 305 -- vendor/github.com/antchfx/xpath/parse.go | 1186 -------- vendor/github.com/antchfx/xpath/query.go | 923 ------- vendor/github.com/antchfx/xpath/xpath.go | 161 -- .../github.com/gocolly/colly/v2/.codecov.yml | 1 - .../github.com/gocolly/colly/v2/.travis.yml | 17 - .../github.com/gocolly/colly/v2/BUILD.bazel | 33 - .../github.com/gocolly/colly/v2/CHANGELOG.md | 33 - .../gocolly/colly/v2/CONTRIBUTING.md | 67 - .../github.com/gocolly/colly/v2/LICENSE.txt | 202 -- vendor/github.com/gocolly/colly/v2/README.md | 117 - vendor/github.com/gocolly/colly/v2/VERSION | 1 - vendor/github.com/gocolly/colly/v2/colly.go | 1430 ---------- vendor/github.com/gocolly/colly/v2/context.go | 87 - .../gocolly/colly/v2/debug/BUILD.bazel | 13 - .../gocolly/colly/v2/debug/debug.go | 36 - .../gocolly/colly/v2/debug/logdebugger.go | 54 - .../gocolly/colly/v2/debug/webdebugger.go | 153 - vendor/github.com/gocolly/colly/v2/go.mod | 23 - vendor/github.com/gocolly/colly/v2/go.sum | 134 - .../gocolly/colly/v2/htmlelement.go | 131 - .../gocolly/colly/v2/http_backend.go | 237 -- .../github.com/gocolly/colly/v2/http_trace.go | 37 - vendor/github.com/gocolly/colly/v2/request.go | 188 -- .../github.com/gocolly/colly/v2/response.go | 115 - .../gocolly/colly/v2/storage/BUILD.bazel | 9 - .../gocolly/colly/v2/storage/storage.go | 128 - .../github.com/gocolly/colly/v2/unmarshal.go | 218 -- .../github.com/gocolly/colly/v2/xmlelement.go | 170 -- vendor/github.com/gorilla/feeds/.travis.yml | 16 - vendor/github.com/gorilla/feeds/AUTHORS | 29 - vendor/github.com/gorilla/feeds/BUILD.bazel | 16 - vendor/github.com/gorilla/feeds/LICENSE | 22 - vendor/github.com/gorilla/feeds/README.md | 185 -- vendor/github.com/gorilla/feeds/atom.go | 169 -- vendor/github.com/gorilla/feeds/doc.go | 73 - vendor/github.com/gorilla/feeds/feed.go | 145 - vendor/github.com/gorilla/feeds/json.go | 183 -- vendor/github.com/gorilla/feeds/rss.go | 168 -- vendor/github.com/gorilla/feeds/test.atom | 92 - vendor/github.com/gorilla/feeds/test.rss | 96 - .../github.com/gorilla/feeds/to-implement.md | 20 - vendor/github.com/gorilla/feeds/uuid.go | 27 - .../github.com/kennygrant/sanitize/.gitignore | 22 - .../kennygrant/sanitize/.travis.yml | 1 - .../kennygrant/sanitize/BUILD.bazel | 10 - vendor/github.com/kennygrant/sanitize/LICENSE | 27 - .../github.com/kennygrant/sanitize/README.md | 62 - .../kennygrant/sanitize/sanitize.go | 388 --- vendor/github.com/saintfish/chardet/2022.go | 102 - vendor/github.com/saintfish/chardet/AUTHORS | 1 - .../github.com/saintfish/chardet/BUILD.bazel | 17 - vendor/github.com/saintfish/chardet/LICENSE | 22 - vendor/github.com/saintfish/chardet/README.md | 10 - .../github.com/saintfish/chardet/detector.go | 136 - .../saintfish/chardet/icu-license.html | 51 - .../saintfish/chardet/multi_byte.go | 345 --- .../saintfish/chardet/recognizer.go | 83 - .../saintfish/chardet/single_byte.go | 882 ------ .../github.com/saintfish/chardet/unicode.go | 103 - vendor/github.com/saintfish/chardet/utf8.go | 71 - vendor/github.com/temoto/robotstxt/.gitignore | 15 - .../github.com/temoto/robotstxt/.golangci.yml | 20 - .../github.com/temoto/robotstxt/.travis.yml | 30 - .../github.com/temoto/robotstxt/BUILD.bazel | 13 - vendor/github.com/temoto/robotstxt/LICENSE | 21 - vendor/github.com/temoto/robotstxt/README.rst | 115 - .../github.com/temoto/robotstxt/codecov.yml | 2 - vendor/github.com/temoto/robotstxt/fuzz.go | 29 - vendor/github.com/temoto/robotstxt/go.mod | 5 - vendor/github.com/temoto/robotstxt/go.sum | 7 - vendor/github.com/temoto/robotstxt/parser.go | 271 -- .../github.com/temoto/robotstxt/robotstxt.go | 227 -- vendor/github.com/temoto/robotstxt/scanner.go | 185 -- .../go.temporal.io/sdk/testsuite/BUILD.bazel | 10 - .../go.temporal.io/sdk/testsuite/testsuite.go | 47 - vendor/golang.org/x/net/html/atom/atom.go | 78 - vendor/golang.org/x/net/html/atom/table.go | 783 ------ .../golang.org/x/net/html/charset/charset.go | 257 -- vendor/golang.org/x/net/html/const.go | 111 - vendor/golang.org/x/net/html/doc.go | 106 - vendor/golang.org/x/net/html/doctype.go | 156 -- vendor/golang.org/x/net/html/entity.go | 2253 --------------- vendor/golang.org/x/net/html/escape.go | 258 -- vendor/golang.org/x/net/html/foreign.go | 222 -- vendor/golang.org/x/net/html/node.go | 225 -- vendor/golang.org/x/net/html/parse.go | 2460 ----------------- vendor/golang.org/x/net/html/render.go | 273 -- vendor/golang.org/x/net/html/token.go | 1224 -------- vendor/modules.txt | 29 +- .../peridotopenapi/README.md | 2 + .../peridotopenapi/model_v1_task_type.go | 3 +- 154 files changed, 10 insertions(+), 27675 deletions(-) delete mode 100644 vendor/github.com/PuerkitoBio/goquery/.gitattributes delete mode 100644 vendor/github.com/PuerkitoBio/goquery/.gitignore delete mode 100644 vendor/github.com/PuerkitoBio/goquery/.travis.yml delete mode 100644 vendor/github.com/PuerkitoBio/goquery/BUILD.bazel delete mode 100644 vendor/github.com/PuerkitoBio/goquery/LICENSE delete mode 100644 vendor/github.com/PuerkitoBio/goquery/README.md delete mode 100644 vendor/github.com/PuerkitoBio/goquery/array.go delete mode 100644 vendor/github.com/PuerkitoBio/goquery/doc.go delete mode 100644 vendor/github.com/PuerkitoBio/goquery/expand.go delete mode 100644 vendor/github.com/PuerkitoBio/goquery/filter.go delete mode 100644 vendor/github.com/PuerkitoBio/goquery/go.mod delete mode 100644 vendor/github.com/PuerkitoBio/goquery/go.sum delete mode 100644 vendor/github.com/PuerkitoBio/goquery/iteration.go delete mode 100644 vendor/github.com/PuerkitoBio/goquery/manipulation.go delete mode 100644 vendor/github.com/PuerkitoBio/goquery/property.go delete mode 100644 vendor/github.com/PuerkitoBio/goquery/query.go delete mode 100644 vendor/github.com/PuerkitoBio/goquery/traversal.go delete mode 100644 vendor/github.com/PuerkitoBio/goquery/type.go delete mode 100644 vendor/github.com/PuerkitoBio/goquery/utilities.go delete mode 100644 vendor/github.com/andybalholm/cascadia/.travis.yml delete mode 100644 vendor/github.com/andybalholm/cascadia/BUILD.bazel delete mode 100644 vendor/github.com/andybalholm/cascadia/LICENSE delete mode 100644 vendor/github.com/andybalholm/cascadia/README.md delete mode 100644 vendor/github.com/andybalholm/cascadia/go.mod delete mode 100644 vendor/github.com/andybalholm/cascadia/parser.go delete mode 100644 vendor/github.com/andybalholm/cascadia/selector.go delete mode 100644 vendor/github.com/andybalholm/cascadia/serialize.go delete mode 100644 vendor/github.com/andybalholm/cascadia/specificity.go delete mode 100644 vendor/github.com/antchfx/htmlquery/.gitignore delete mode 100644 vendor/github.com/antchfx/htmlquery/.travis.yml delete mode 100644 vendor/github.com/antchfx/htmlquery/BUILD.bazel delete mode 100644 vendor/github.com/antchfx/htmlquery/LICENSE delete mode 100644 vendor/github.com/antchfx/htmlquery/README.md delete mode 100644 vendor/github.com/antchfx/htmlquery/cache.go delete mode 100644 vendor/github.com/antchfx/htmlquery/go.mod delete mode 100644 vendor/github.com/antchfx/htmlquery/go.sum delete mode 100644 vendor/github.com/antchfx/htmlquery/query.go delete mode 100644 vendor/github.com/antchfx/xmlquery/.gitignore delete mode 100644 vendor/github.com/antchfx/xmlquery/.travis.yml delete mode 100644 vendor/github.com/antchfx/xmlquery/BUILD.bazel delete mode 100644 vendor/github.com/antchfx/xmlquery/LICENSE delete mode 100644 vendor/github.com/antchfx/xmlquery/README.md delete mode 100644 vendor/github.com/antchfx/xmlquery/books.xml delete mode 100644 vendor/github.com/antchfx/xmlquery/cache.go delete mode 100644 vendor/github.com/antchfx/xmlquery/cached_reader.go delete mode 100644 vendor/github.com/antchfx/xmlquery/go.mod delete mode 100644 vendor/github.com/antchfx/xmlquery/go.sum delete mode 100644 vendor/github.com/antchfx/xmlquery/node.go delete mode 100644 vendor/github.com/antchfx/xmlquery/options.go delete mode 100644 vendor/github.com/antchfx/xmlquery/parse.go delete mode 100644 vendor/github.com/antchfx/xmlquery/query.go delete mode 100644 vendor/github.com/antchfx/xpath/.gitignore delete mode 100644 vendor/github.com/antchfx/xpath/.travis.yml delete mode 100644 vendor/github.com/antchfx/xpath/BUILD.bazel delete mode 100644 vendor/github.com/antchfx/xpath/LICENSE delete mode 100644 vendor/github.com/antchfx/xpath/README.md delete mode 100644 vendor/github.com/antchfx/xpath/build.go delete mode 100644 vendor/github.com/antchfx/xpath/func.go delete mode 100644 vendor/github.com/antchfx/xpath/func_go110.go delete mode 100644 vendor/github.com/antchfx/xpath/func_pre_go110.go delete mode 100644 vendor/github.com/antchfx/xpath/operator.go delete mode 100644 vendor/github.com/antchfx/xpath/parse.go delete mode 100644 vendor/github.com/antchfx/xpath/query.go delete mode 100644 vendor/github.com/antchfx/xpath/xpath.go delete mode 100644 vendor/github.com/gocolly/colly/v2/.codecov.yml delete mode 100644 vendor/github.com/gocolly/colly/v2/.travis.yml delete mode 100644 vendor/github.com/gocolly/colly/v2/BUILD.bazel delete mode 100644 vendor/github.com/gocolly/colly/v2/CHANGELOG.md delete mode 100644 vendor/github.com/gocolly/colly/v2/CONTRIBUTING.md delete mode 100644 vendor/github.com/gocolly/colly/v2/LICENSE.txt delete mode 100644 vendor/github.com/gocolly/colly/v2/README.md delete mode 100644 vendor/github.com/gocolly/colly/v2/VERSION delete mode 100644 vendor/github.com/gocolly/colly/v2/colly.go delete mode 100644 vendor/github.com/gocolly/colly/v2/context.go delete mode 100644 vendor/github.com/gocolly/colly/v2/debug/BUILD.bazel delete mode 100644 vendor/github.com/gocolly/colly/v2/debug/debug.go delete mode 100644 vendor/github.com/gocolly/colly/v2/debug/logdebugger.go delete mode 100644 vendor/github.com/gocolly/colly/v2/debug/webdebugger.go delete mode 100644 vendor/github.com/gocolly/colly/v2/go.mod delete mode 100644 vendor/github.com/gocolly/colly/v2/go.sum delete mode 100644 vendor/github.com/gocolly/colly/v2/htmlelement.go delete mode 100644 vendor/github.com/gocolly/colly/v2/http_backend.go delete mode 100644 vendor/github.com/gocolly/colly/v2/http_trace.go delete mode 100644 vendor/github.com/gocolly/colly/v2/request.go delete mode 100644 vendor/github.com/gocolly/colly/v2/response.go delete mode 100644 vendor/github.com/gocolly/colly/v2/storage/BUILD.bazel delete mode 100644 vendor/github.com/gocolly/colly/v2/storage/storage.go delete mode 100644 vendor/github.com/gocolly/colly/v2/unmarshal.go delete mode 100644 vendor/github.com/gocolly/colly/v2/xmlelement.go delete mode 100644 vendor/github.com/gorilla/feeds/.travis.yml delete mode 100644 vendor/github.com/gorilla/feeds/AUTHORS delete mode 100644 vendor/github.com/gorilla/feeds/BUILD.bazel delete mode 100644 vendor/github.com/gorilla/feeds/LICENSE delete mode 100644 vendor/github.com/gorilla/feeds/README.md delete mode 100644 vendor/github.com/gorilla/feeds/atom.go delete mode 100644 vendor/github.com/gorilla/feeds/doc.go delete mode 100644 vendor/github.com/gorilla/feeds/feed.go delete mode 100644 vendor/github.com/gorilla/feeds/json.go delete mode 100644 vendor/github.com/gorilla/feeds/rss.go delete mode 100644 vendor/github.com/gorilla/feeds/test.atom delete mode 100644 vendor/github.com/gorilla/feeds/test.rss delete mode 100644 vendor/github.com/gorilla/feeds/to-implement.md delete mode 100644 vendor/github.com/gorilla/feeds/uuid.go delete mode 100644 vendor/github.com/kennygrant/sanitize/.gitignore delete mode 100644 vendor/github.com/kennygrant/sanitize/.travis.yml delete mode 100644 vendor/github.com/kennygrant/sanitize/BUILD.bazel delete mode 100644 vendor/github.com/kennygrant/sanitize/LICENSE delete mode 100644 vendor/github.com/kennygrant/sanitize/README.md delete mode 100644 vendor/github.com/kennygrant/sanitize/sanitize.go delete mode 100644 vendor/github.com/saintfish/chardet/2022.go delete mode 100644 vendor/github.com/saintfish/chardet/AUTHORS delete mode 100644 vendor/github.com/saintfish/chardet/BUILD.bazel delete mode 100644 vendor/github.com/saintfish/chardet/LICENSE delete mode 100644 vendor/github.com/saintfish/chardet/README.md delete mode 100644 vendor/github.com/saintfish/chardet/detector.go delete mode 100644 vendor/github.com/saintfish/chardet/icu-license.html delete mode 100644 vendor/github.com/saintfish/chardet/multi_byte.go delete mode 100644 vendor/github.com/saintfish/chardet/recognizer.go delete mode 100644 vendor/github.com/saintfish/chardet/single_byte.go delete mode 100644 vendor/github.com/saintfish/chardet/unicode.go delete mode 100644 vendor/github.com/saintfish/chardet/utf8.go delete mode 100644 vendor/github.com/temoto/robotstxt/.gitignore delete mode 100644 vendor/github.com/temoto/robotstxt/.golangci.yml delete mode 100644 vendor/github.com/temoto/robotstxt/.travis.yml delete mode 100644 vendor/github.com/temoto/robotstxt/BUILD.bazel delete mode 100644 vendor/github.com/temoto/robotstxt/LICENSE delete mode 100644 vendor/github.com/temoto/robotstxt/README.rst delete mode 100644 vendor/github.com/temoto/robotstxt/codecov.yml delete mode 100644 vendor/github.com/temoto/robotstxt/fuzz.go delete mode 100644 vendor/github.com/temoto/robotstxt/go.mod delete mode 100644 vendor/github.com/temoto/robotstxt/go.sum delete mode 100644 vendor/github.com/temoto/robotstxt/parser.go delete mode 100644 vendor/github.com/temoto/robotstxt/robotstxt.go delete mode 100644 vendor/github.com/temoto/robotstxt/scanner.go delete mode 100644 vendor/go.temporal.io/sdk/testsuite/BUILD.bazel delete mode 100644 vendor/go.temporal.io/sdk/testsuite/testsuite.go delete mode 100644 vendor/golang.org/x/net/html/atom/atom.go delete mode 100644 vendor/golang.org/x/net/html/atom/table.go delete mode 100644 vendor/golang.org/x/net/html/charset/charset.go delete mode 100644 vendor/golang.org/x/net/html/const.go delete mode 100644 vendor/golang.org/x/net/html/doc.go delete mode 100644 vendor/golang.org/x/net/html/doctype.go delete mode 100644 vendor/golang.org/x/net/html/entity.go delete mode 100644 vendor/golang.org/x/net/html/escape.go delete mode 100644 vendor/golang.org/x/net/html/foreign.go delete mode 100644 vendor/golang.org/x/net/html/node.go delete mode 100644 vendor/golang.org/x/net/html/parse.go delete mode 100644 vendor/golang.org/x/net/html/render.go delete mode 100644 vendor/golang.org/x/net/html/token.go diff --git a/go.mod b/go.mod index d7ce628..0b65ad5 100644 --- a/go.mod +++ b/go.mod @@ -62,6 +62,7 @@ require ( openapi.peridot.resf.org/peridotopenapi v0.0.0-00010101000000-000000000000 peridot.resf.org/common v0.0.0-00010101000000-000000000000 peridot.resf.org/obsidian/pb v0.0.0-00010101000000-000000000000 + peridot.resf.org/peridot/admin/pb v0.0.0-00010101000000-000000000000 // indirect peridot.resf.org/peridot/keykeeper/pb v0.0.0-00010101000000-000000000000 peridot.resf.org/peridot/pb v0.0.0-00010101000000-000000000000 peridot.resf.org/peridot/yumrepofs/pb v0.0.0-00010101000000-000000000000 @@ -80,6 +81,7 @@ replace ( bazel.build/semver => ./bazel-bin/build/bazel/semver/semver_go_proto_/bazel.build/semver peridot.resf.org/obsidian/pb => ./bazel-bin/obsidian/proto/v1/obsidianpb_go_proto_/peridot.resf.org/obsidian/pb peridot.resf.org/peridot/pb => ./bazel-bin/peridot/proto/v1/peridotpb_go_proto_/peridot.resf.org/peridot/pb + peridot.resf.org/peridot/admin/pb => ./bazel-bin/peridot/proto/v1/admin/adminpb_go_proto_/peridot.resf.org/peridot/admin/pb peridot.resf.org/peridot/keykeeper/pb => ./bazel-bin/peridot/proto/v1/keykeeper/keykeeperpb_go_proto_/peridot.resf.org/peridot/keykeeper/pb peridot.resf.org/peridot/yumrepofs/pb => ./bazel-bin/peridot/proto/v1/yumrepofs/yumrepofspb_go_proto_/peridot.resf.org/peridot/yumrepofs/pb peridot.resf.org/common => ./bazel-bin/proto/commonpb_go_proto_/peridot.resf.org/common diff --git a/peridot/proto/v1/admin/admin.proto b/peridot/proto/v1/admin/admin.proto index ccffe83..2253e46 100644 --- a/peridot/proto/v1/admin/admin.proto +++ b/peridot/proto/v1/admin/admin.proto @@ -4,7 +4,6 @@ package resf.peridot.admin.v1; import "google/api/annotations.proto"; import "validate/validate.proto"; -import "apollo/proto/v1/advisory.proto"; import "peridot/proto/v1/task.proto"; option go_package = "peridot.resf.org/peridot/admin/pb;adminpb"; diff --git a/vendor/github.com/PuerkitoBio/goquery/.gitattributes b/vendor/github.com/PuerkitoBio/goquery/.gitattributes deleted file mode 100644 index 0cc26ec..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -testdata/* linguist-vendored diff --git a/vendor/github.com/PuerkitoBio/goquery/.gitignore b/vendor/github.com/PuerkitoBio/goquery/.gitignore deleted file mode 100644 index 970381c..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/.gitignore +++ /dev/null @@ -1,16 +0,0 @@ -# editor temporary files -*.sublime-* -.DS_Store -*.swp -#*.*# -tags - -# direnv config -.env* - -# test binaries -*.test - -# coverage and profilte outputs -*.out - diff --git a/vendor/github.com/PuerkitoBio/goquery/.travis.yml b/vendor/github.com/PuerkitoBio/goquery/.travis.yml deleted file mode 100644 index 8430c86..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/.travis.yml +++ /dev/null @@ -1,31 +0,0 @@ -arch: - - amd64 - - ppc64le -language: go - -go: - - 1.7.x - - 1.8.x - - 1.9.x - - 1.10.x - - 1.11.x - - 1.12.x - - 1.13.x - - 1.14.x - - 1.15.x - - tip - -jobs: - exclude: - - arch: ppc64le - go: 1.7.x - - arch: ppc64le - go: 1.8.x - - arch: ppc64le - go: 1.9.x - - arch: ppc64le - go: 1.10.x - - arch: ppc64le - go: 1.11.x - - arch: ppc64le - go: 1.12.x diff --git a/vendor/github.com/PuerkitoBio/goquery/BUILD.bazel b/vendor/github.com/PuerkitoBio/goquery/BUILD.bazel deleted file mode 100644 index 1c4addf..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/BUILD.bazel +++ /dev/null @@ -1,25 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -go_library( - name = "goquery", - srcs = [ - "array.go", - "doc.go", - "expand.go", - "filter.go", - "iteration.go", - "manipulation.go", - "property.go", - "query.go", - "traversal.go", - "type.go", - "utilities.go", - ], - importmap = "peridot.resf.org/vendor/github.com/PuerkitoBio/goquery", - importpath = "github.com/PuerkitoBio/goquery", - visibility = ["//visibility:public"], - deps = [ - "//vendor/github.com/andybalholm/cascadia", - "@org_golang_x_net//html", - ], -) diff --git a/vendor/github.com/PuerkitoBio/goquery/LICENSE b/vendor/github.com/PuerkitoBio/goquery/LICENSE deleted file mode 100644 index 25372c2..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/LICENSE +++ /dev/null @@ -1,12 +0,0 @@ -Copyright (c) 2012-2021, Martin Angers & Contributors -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - -* Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/PuerkitoBio/goquery/README.md b/vendor/github.com/PuerkitoBio/goquery/README.md deleted file mode 100644 index 924cef7..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/README.md +++ /dev/null @@ -1,190 +0,0 @@ -# goquery - a little like that j-thing, only in Go -[![builds.sr.ht status](https://builds.sr.ht/~mna/goquery/commits/fedora.yml.svg)](https://builds.sr.ht/~mna/goquery/commits/fedora.yml?) [![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery) [![Go Reference](https://pkg.go.dev/badge/github.com/PuerkitoBio/goquery.svg)](https://pkg.go.dev/github.com/PuerkitoBio/goquery) [![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge) - -goquery brings a syntax and a set of features similar to [jQuery][] to the [Go language][go]. It is based on Go's [net/html package][html] and the CSS Selector library [cascadia][]. Since the net/html parser returns nodes, and not a full-featured DOM tree, jQuery's stateful manipulation functions (like height(), css(), detach()) have been left off. - -Also, because the net/html parser requires UTF-8 encoding, so does goquery: it is the caller's responsibility to ensure that the source document provides UTF-8 encoded HTML. See the [wiki][] for various options to do this. - -Syntax-wise, it is as close as possible to jQuery, with the same function names when possible, and that warm and fuzzy chainable interface. jQuery being the ultra-popular library that it is, I felt that writing a similar HTML-manipulating library was better to follow its API than to start anew (in the same spirit as Go's `fmt` package), even though some of its methods are less than intuitive (looking at you, [index()][index]...). - -## Table of Contents - -* [Installation](#installation) -* [Changelog](#changelog) -* [API](#api) -* [Examples](#examples) -* [Related Projects](#related-projects) -* [Support](#support) -* [License](#license) - -## Installation - -Please note that because of the net/html dependency, goquery requires Go1.1+ and is tested on Go1.7+. - - $ go get github.com/PuerkitoBio/goquery - -(optional) To run unit tests: - - $ cd $GOPATH/src/github.com/PuerkitoBio/goquery - $ go test - -(optional) To run benchmarks (warning: it runs for a few minutes): - - $ cd $GOPATH/src/github.com/PuerkitoBio/goquery - $ go test -bench=".*" - -## Changelog - -**Note that goquery's API is now stable, and will not break.** - -* **2021-06-14 (v1.7.0)** : Add `Single` and `SingleMatcher` functions to optimize first-match selection (thanks [@gdollardollar](https://github.com/gdollardollar)). -* **2021-01-11 (v1.6.1)** : Fix panic when calling `{Prepend,Append,Set}Html` on a `Selection` that contains non-Element nodes. -* **2020-10-08 (v1.6.0)** : Parse html in context of the container node for all functions that deal with html strings (`AfterHtml`, `AppendHtml`, etc.). Thanks to [@thiemok][thiemok] and [@davidjwilkins][djw] for their work on this. -* **2020-02-04 (v1.5.1)** : Update module dependencies. -* **2018-11-15 (v1.5.0)** : Go module support (thanks @Zaba505). -* **2018-06-07 (v1.4.1)** : Add `NewDocumentFromReader` examples. -* **2018-03-24 (v1.4.0)** : Deprecate `NewDocument(url)` and `NewDocumentFromResponse(response)`. -* **2018-01-28 (v1.3.0)** : Add `ToEnd` constant to `Slice` until the end of the selection (thanks to @davidjwilkins for raising the issue). -* **2018-01-11 (v1.2.0)** : Add `AddBack*` and deprecate `AndSelf` (thanks to @davidjwilkins). -* **2017-02-12 (v1.1.0)** : Add `SetHtml` and `SetText` (thanks to @glebtv). -* **2016-12-29 (v1.0.2)** : Optimize allocations for `Selection.Text` (thanks to @radovskyb). -* **2016-08-28 (v1.0.1)** : Optimize performance for large documents. -* **2016-07-27 (v1.0.0)** : Tag version 1.0.0. -* **2016-06-15** : Invalid selector strings internally compile to a `Matcher` implementation that never matches any node (instead of a panic). So for example, `doc.Find("~")` returns an empty `*Selection` object. -* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see [doc][] for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`). -* **2015-04-20** : Add `AttrOr` helper method to return the attribute's value or a default value if absent. Thanks to [piotrkowalczuk][piotr]. -* **2015-02-04** : Add more manipulation functions - Prepend* - thanks again to [Andrew Stone][thatguystone]. -* **2014-11-28** : Add more manipulation functions - ReplaceWith*, Wrap* and Unwrap - thanks again to [Andrew Stone][thatguystone]. -* **2014-11-07** : Add manipulation functions (thanks to [Andrew Stone][thatguystone]) and `*Matcher` functions, that receive compiled cascadia selectors instead of selector strings, thus avoiding potential panics thrown by goquery via `cascadia.MustCompile` calls. This results in better performance (selectors can be compiled once and reused) and more idiomatic error handling (you can handle cascadia's compilation errors, instead of recovering from panics, which had been bugging me for a long time). Note that the actual type expected is a `Matcher` interface, that `cascadia.Selector` implements. Other matcher implementations could be used. -* **2014-11-06** : Change import paths of net/html to golang.org/x/net/html (see https://groups.google.com/forum/#!topic/golang-nuts/eD8dh3T9yyA). Make sure to update your code to use the new import path too when you call goquery with `html.Node`s. -* **v0.3.2** : Add `NewDocumentFromReader()` (thanks jweir) which allows creating a goquery document from an io.Reader. -* **v0.3.1** : Add `NewDocumentFromResponse()` (thanks assassingj) which allows creating a goquery document from an http response. -* **v0.3.0** : Add `EachWithBreak()` which allows to break out of an `Each()` loop by returning false. This function was added instead of changing the existing `Each()` to avoid breaking compatibility. -* **v0.2.1** : Make go-getable, now that [go.net/html is Go1.0-compatible][gonet] (thanks to @matrixik for pointing this out). -* **v0.2.0** : Add support for negative indices in Slice(). **BREAKING CHANGE** `Document.Root` is removed, `Document` is now a `Selection` itself (a selection of one, the root element, just like `Document.Root` was before). Add jQuery's Closest() method. -* **v0.1.1** : Add benchmarks to use as baseline for refactorings, refactor Next...() and Prev...() methods to use the new html package's linked list features (Next/PrevSibling, FirstChild). Good performance boost (40+% in some cases). -* **v0.1.0** : Initial release. - -## API - -goquery exposes two structs, `Document` and `Selection`, and the `Matcher` interface. Unlike jQuery, which is loaded as part of a DOM document, and thus acts on its containing document, goquery doesn't know which HTML document to act upon. So it needs to be told, and that's what the `Document` type is for. It holds the root document node as the initial Selection value to manipulate. - -jQuery often has many variants for the same function (no argument, a selector string argument, a jQuery object argument, a DOM element argument, ...). Instead of exposing the same features in goquery as a single method with variadic empty interface arguments, statically-typed signatures are used following this naming convention: - -* When the jQuery equivalent can be called with no argument, it has the same name as jQuery for the no argument signature (e.g.: `Prev()`), and the version with a selector string argument is called `XxxFiltered()` (e.g.: `PrevFiltered()`) -* When the jQuery equivalent **requires** one argument, the same name as jQuery is used for the selector string version (e.g.: `Is()`) -* The signatures accepting a jQuery object as argument are defined in goquery as `XxxSelection()` and take a `*Selection` object as argument (e.g.: `FilterSelection()`) -* The signatures accepting a DOM element as argument in jQuery are defined in goquery as `XxxNodes()` and take a variadic argument of type `*html.Node` (e.g.: `FilterNodes()`) -* The signatures accepting a function as argument in jQuery are defined in goquery as `XxxFunction()` and take a function as argument (e.g.: `FilterFunction()`) -* The goquery methods that can be called with a selector string have a corresponding version that take a `Matcher` interface and are defined as `XxxMatcher()` (e.g.: `IsMatcher()`) - -Utility functions that are not in jQuery but are useful in Go are implemented as functions (that take a `*Selection` as parameter), to avoid a potential naming clash on the `*Selection`'s methods (reserved for jQuery-equivalent behaviour). - -The complete [package reference documentation can be found here][doc]. - -Please note that Cascadia's selectors do not necessarily match all supported selectors of jQuery (Sizzle). See the [cascadia project][cascadia] for details. Invalid selector strings compile to a `Matcher` that fails to match any node. Behaviour of the various functions that take a selector string as argument follows from that fact, e.g. (where `~` is an invalid selector string): - -* `Find("~")` returns an empty selection because the selector string doesn't match anything. -* `Add("~")` returns a new selection that holds the same nodes as the original selection, because it didn't add any node (selector string didn't match anything). -* `ParentsFiltered("~")` returns an empty selection because the selector string doesn't match anything. -* `ParentsUntil("~")` returns all parents of the selection because the selector string didn't match any element to stop before the top element. - -## Examples - -See some tips and tricks in the [wiki][]. - -Adapted from example_test.go: - -```Go -package main - -import ( - "fmt" - "log" - "net/http" - - "github.com/PuerkitoBio/goquery" -) - -func ExampleScrape() { - // Request the HTML page. - res, err := http.Get("http://metalsucks.net") - if err != nil { - log.Fatal(err) - } - defer res.Body.Close() - if res.StatusCode != 200 { - log.Fatalf("status code error: %d %s", res.StatusCode, res.Status) - } - - // Load the HTML document - doc, err := goquery.NewDocumentFromReader(res.Body) - if err != nil { - log.Fatal(err) - } - - // Find the review items - doc.Find(".left-content article .post-title").Each(func(i int, s *goquery.Selection) { - // For each item found, get the title - title := s.Find("a").Text() - fmt.Printf("Review %d: %s\n", i, title) - }) -} - -func main() { - ExampleScrape() -} -``` - -## Related Projects - -- [Goq][goq], an HTML deserialization and scraping library based on goquery and struct tags. -- [andybalholm/cascadia][cascadia], the CSS selector library used by goquery. -- [suntong/cascadia][cascadiacli], a command-line interface to the cascadia CSS selector library, useful to test selectors. -- [gocolly/colly](https://github.com/gocolly/colly), a lightning fast and elegant Scraping Framework -- [gnulnx/goperf](https://github.com/gnulnx/goperf), a website performance test tool that also fetches static assets. -- [MontFerret/ferret](https://github.com/MontFerret/ferret), declarative web scraping. -- [tacusci/berrycms](https://github.com/tacusci/berrycms), a modern simple to use CMS with easy to write plugins -- [Dataflow kit](https://github.com/slotix/dataflowkit), Web Scraping framework for Gophers. -- [Geziyor](https://github.com/geziyor/geziyor), a fast web crawling & scraping framework for Go. Supports JS rendering. -- [Pagser](https://github.com/foolin/pagser), a simple, easy, extensible, configurable HTML parser to struct based on goquery and struct tags. -- [stitcherd](https://github.com/vhodges/stitcherd), A server for doing server side includes using css selectors and DOM updates. - -## Support - -There are a number of ways you can support the project: - -* Use it, star it, build something with it, spread the word! - - If you do build something open-source or otherwise publicly-visible, let me know so I can add it to the [Related Projects](#related-projects) section! -* Raise issues to improve the project (note: doc typos and clarifications are issues too!) - - Please search existing issues before opening a new one - it may have already been adressed. -* Pull requests: please discuss new code in an issue first, unless the fix is really trivial. - - Make sure new code is tested. - - Be mindful of existing code - PRs that break existing code have a high probability of being declined, unless it fixes a serious issue. -* Sponsor the developer - - See the Github Sponsor button at the top of the repo on github - - or via BuyMeACoffee.com, below - -Buy Me A Coffee - -## License - -The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia's license is [here][caslic]. - -[jquery]: http://jquery.com/ -[go]: http://golang.org/ -[cascadia]: https://github.com/andybalholm/cascadia -[cascadiacli]: https://github.com/suntong/cascadia -[bsd]: http://opensource.org/licenses/BSD-3-Clause -[golic]: http://golang.org/LICENSE -[caslic]: https://github.com/andybalholm/cascadia/blob/master/LICENSE -[doc]: https://pkg.go.dev/github.com/PuerkitoBio/goquery -[index]: http://api.jquery.com/index/ -[gonet]: https://github.com/golang/net/ -[html]: https://pkg.go.dev/golang.org/x/net/html -[wiki]: https://github.com/PuerkitoBio/goquery/wiki/Tips-and-tricks -[thatguystone]: https://github.com/thatguystone -[piotr]: https://github.com/piotrkowalczuk -[goq]: https://github.com/andrewstuart/goq -[thiemok]: https://github.com/thiemok -[djw]: https://github.com/davidjwilkins diff --git a/vendor/github.com/PuerkitoBio/goquery/array.go b/vendor/github.com/PuerkitoBio/goquery/array.go deleted file mode 100644 index 1b1f6cb..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/array.go +++ /dev/null @@ -1,124 +0,0 @@ -package goquery - -import ( - "golang.org/x/net/html" -) - -const ( - maxUint = ^uint(0) - maxInt = int(maxUint >> 1) - - // ToEnd is a special index value that can be used as end index in a call - // to Slice so that all elements are selected until the end of the Selection. - // It is equivalent to passing (*Selection).Length(). - ToEnd = maxInt -) - -// First reduces the set of matched elements to the first in the set. -// It returns a new Selection object, and an empty Selection object if the -// the selection is empty. -func (s *Selection) First() *Selection { - return s.Eq(0) -} - -// Last reduces the set of matched elements to the last in the set. -// It returns a new Selection object, and an empty Selection object if -// the selection is empty. -func (s *Selection) Last() *Selection { - return s.Eq(-1) -} - -// Eq reduces the set of matched elements to the one at the specified index. -// If a negative index is given, it counts backwards starting at the end of the -// set. It returns a new Selection object, and an empty Selection object if the -// index is invalid. -func (s *Selection) Eq(index int) *Selection { - if index < 0 { - index += len(s.Nodes) - } - - if index >= len(s.Nodes) || index < 0 { - return newEmptySelection(s.document) - } - - return s.Slice(index, index+1) -} - -// Slice reduces the set of matched elements to a subset specified by a range -// of indices. The start index is 0-based and indicates the index of the first -// element to select. The end index is 0-based and indicates the index at which -// the elements stop being selected (the end index is not selected). -// -// The indices may be negative, in which case they represent an offset from the -// end of the selection. -// -// The special value ToEnd may be specified as end index, in which case all elements -// until the end are selected. This works both for a positive and negative start -// index. -func (s *Selection) Slice(start, end int) *Selection { - if start < 0 { - start += len(s.Nodes) - } - if end == ToEnd { - end = len(s.Nodes) - } else if end < 0 { - end += len(s.Nodes) - } - return pushStack(s, s.Nodes[start:end]) -} - -// Get retrieves the underlying node at the specified index. -// Get without parameter is not implemented, since the node array is available -// on the Selection object. -func (s *Selection) Get(index int) *html.Node { - if index < 0 { - index += len(s.Nodes) // Negative index gets from the end - } - return s.Nodes[index] -} - -// Index returns the position of the first element within the Selection object -// relative to its sibling elements. -func (s *Selection) Index() int { - if len(s.Nodes) > 0 { - return newSingleSelection(s.Nodes[0], s.document).PrevAll().Length() - } - return -1 -} - -// IndexSelector returns the position of the first element within the -// Selection object relative to the elements matched by the selector, or -1 if -// not found. -func (s *Selection) IndexSelector(selector string) int { - if len(s.Nodes) > 0 { - sel := s.document.Find(selector) - return indexInSlice(sel.Nodes, s.Nodes[0]) - } - return -1 -} - -// IndexMatcher returns the position of the first element within the -// Selection object relative to the elements matched by the matcher, or -1 if -// not found. -func (s *Selection) IndexMatcher(m Matcher) int { - if len(s.Nodes) > 0 { - sel := s.document.FindMatcher(m) - return indexInSlice(sel.Nodes, s.Nodes[0]) - } - return -1 -} - -// IndexOfNode returns the position of the specified node within the Selection -// object, or -1 if not found. -func (s *Selection) IndexOfNode(node *html.Node) int { - return indexInSlice(s.Nodes, node) -} - -// IndexOfSelection returns the position of the first node in the specified -// Selection object within this Selection object, or -1 if not found. -func (s *Selection) IndexOfSelection(sel *Selection) int { - if sel != nil && len(sel.Nodes) > 0 { - return indexInSlice(s.Nodes, sel.Nodes[0]) - } - return -1 -} diff --git a/vendor/github.com/PuerkitoBio/goquery/doc.go b/vendor/github.com/PuerkitoBio/goquery/doc.go deleted file mode 100644 index 71146a7..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/doc.go +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright (c) 2012-2016, Martin Angers & Contributors -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation and/or -// other materials provided with the distribution. -// * Neither the name of the author nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS -// OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY -// AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY -// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -/* -Package goquery implements features similar to jQuery, including the chainable -syntax, to manipulate and query an HTML document. - -It brings a syntax and a set of features similar to jQuery to the Go language. -It is based on Go's net/html package and the CSS Selector library cascadia. -Since the net/html parser returns nodes, and not a full-featured DOM -tree, jQuery's stateful manipulation functions (like height(), css(), detach()) -have been left off. - -Also, because the net/html parser requires UTF-8 encoding, so does goquery: it is -the caller's responsibility to ensure that the source document provides UTF-8 encoded HTML. -See the repository's wiki for various options on how to do this. - -Syntax-wise, it is as close as possible to jQuery, with the same method names when -possible, and that warm and fuzzy chainable interface. jQuery being the -ultra-popular library that it is, writing a similar HTML-manipulating -library was better to follow its API than to start anew (in the same spirit as -Go's fmt package), even though some of its methods are less than intuitive (looking -at you, index()...). - -It is hosted on GitHub, along with additional documentation in the README.md -file: https://github.com/puerkitobio/goquery - -Please note that because of the net/html dependency, goquery requires Go1.1+. - -The various methods are split into files based on the category of behavior. -The three dots (...) indicate that various "overloads" are available. - -* array.go : array-like positional manipulation of the selection. - - Eq() - - First() - - Get() - - Index...() - - Last() - - Slice() - -* expand.go : methods that expand or augment the selection's set. - - Add...() - - AndSelf() - - Union(), which is an alias for AddSelection() - -* filter.go : filtering methods, that reduce the selection's set. - - End() - - Filter...() - - Has...() - - Intersection(), which is an alias of FilterSelection() - - Not...() - -* iteration.go : methods to loop over the selection's nodes. - - Each() - - EachWithBreak() - - Map() - -* manipulation.go : methods for modifying the document - - After...() - - Append...() - - Before...() - - Clone() - - Empty() - - Prepend...() - - Remove...() - - ReplaceWith...() - - Unwrap() - - Wrap...() - - WrapAll...() - - WrapInner...() - -* property.go : methods that inspect and get the node's properties values. - - Attr*(), RemoveAttr(), SetAttr() - - AddClass(), HasClass(), RemoveClass(), ToggleClass() - - Html() - - Length() - - Size(), which is an alias for Length() - - Text() - -* query.go : methods that query, or reflect, a node's identity. - - Contains() - - Is...() - -* traversal.go : methods to traverse the HTML document tree. - - Children...() - - Contents() - - Find...() - - Next...() - - Parent[s]...() - - Prev...() - - Siblings...() - -* type.go : definition of the types exposed by goquery. - - Document - - Selection - - Matcher - -* utilities.go : definition of helper functions (and not methods on a *Selection) -that are not part of jQuery, but are useful to goquery. - - NodeName - - OuterHtml -*/ -package goquery diff --git a/vendor/github.com/PuerkitoBio/goquery/expand.go b/vendor/github.com/PuerkitoBio/goquery/expand.go deleted file mode 100644 index 7caade5..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/expand.go +++ /dev/null @@ -1,70 +0,0 @@ -package goquery - -import "golang.org/x/net/html" - -// Add adds the selector string's matching nodes to those in the current -// selection and returns a new Selection object. -// The selector string is run in the context of the document of the current -// Selection object. -func (s *Selection) Add(selector string) *Selection { - return s.AddNodes(findWithMatcher([]*html.Node{s.document.rootNode}, compileMatcher(selector))...) -} - -// AddMatcher adds the matcher's matching nodes to those in the current -// selection and returns a new Selection object. -// The matcher is run in the context of the document of the current -// Selection object. -func (s *Selection) AddMatcher(m Matcher) *Selection { - return s.AddNodes(findWithMatcher([]*html.Node{s.document.rootNode}, m)...) -} - -// AddSelection adds the specified Selection object's nodes to those in the -// current selection and returns a new Selection object. -func (s *Selection) AddSelection(sel *Selection) *Selection { - if sel == nil { - return s.AddNodes() - } - return s.AddNodes(sel.Nodes...) -} - -// Union is an alias for AddSelection. -func (s *Selection) Union(sel *Selection) *Selection { - return s.AddSelection(sel) -} - -// AddNodes adds the specified nodes to those in the -// current selection and returns a new Selection object. -func (s *Selection) AddNodes(nodes ...*html.Node) *Selection { - return pushStack(s, appendWithoutDuplicates(s.Nodes, nodes, nil)) -} - -// AndSelf adds the previous set of elements on the stack to the current set. -// It returns a new Selection object containing the current Selection combined -// with the previous one. -// Deprecated: This function has been deprecated and is now an alias for AddBack(). -func (s *Selection) AndSelf() *Selection { - return s.AddBack() -} - -// AddBack adds the previous set of elements on the stack to the current set. -// It returns a new Selection object containing the current Selection combined -// with the previous one. -func (s *Selection) AddBack() *Selection { - return s.AddSelection(s.prevSel) -} - -// AddBackFiltered reduces the previous set of elements on the stack to those that -// match the selector string, and adds them to the current set. -// It returns a new Selection object containing the current Selection combined -// with the filtered previous one -func (s *Selection) AddBackFiltered(selector string) *Selection { - return s.AddSelection(s.prevSel.Filter(selector)) -} - -// AddBackMatcher reduces the previous set of elements on the stack to those that match -// the mateher, and adds them to the curernt set. -// It returns a new Selection object containing the current Selection combined -// with the filtered previous one -func (s *Selection) AddBackMatcher(m Matcher) *Selection { - return s.AddSelection(s.prevSel.FilterMatcher(m)) -} diff --git a/vendor/github.com/PuerkitoBio/goquery/filter.go b/vendor/github.com/PuerkitoBio/goquery/filter.go deleted file mode 100644 index 9138ffb..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/filter.go +++ /dev/null @@ -1,163 +0,0 @@ -package goquery - -import "golang.org/x/net/html" - -// Filter reduces the set of matched elements to those that match the selector string. -// It returns a new Selection object for this subset of matching elements. -func (s *Selection) Filter(selector string) *Selection { - return s.FilterMatcher(compileMatcher(selector)) -} - -// FilterMatcher reduces the set of matched elements to those that match -// the given matcher. It returns a new Selection object for this subset -// of matching elements. -func (s *Selection) FilterMatcher(m Matcher) *Selection { - return pushStack(s, winnow(s, m, true)) -} - -// Not removes elements from the Selection that match the selector string. -// It returns a new Selection object with the matching elements removed. -func (s *Selection) Not(selector string) *Selection { - return s.NotMatcher(compileMatcher(selector)) -} - -// NotMatcher removes elements from the Selection that match the given matcher. -// It returns a new Selection object with the matching elements removed. -func (s *Selection) NotMatcher(m Matcher) *Selection { - return pushStack(s, winnow(s, m, false)) -} - -// FilterFunction reduces the set of matched elements to those that pass the function's test. -// It returns a new Selection object for this subset of elements. -func (s *Selection) FilterFunction(f func(int, *Selection) bool) *Selection { - return pushStack(s, winnowFunction(s, f, true)) -} - -// NotFunction removes elements from the Selection that pass the function's test. -// It returns a new Selection object with the matching elements removed. -func (s *Selection) NotFunction(f func(int, *Selection) bool) *Selection { - return pushStack(s, winnowFunction(s, f, false)) -} - -// FilterNodes reduces the set of matched elements to those that match the specified nodes. -// It returns a new Selection object for this subset of elements. -func (s *Selection) FilterNodes(nodes ...*html.Node) *Selection { - return pushStack(s, winnowNodes(s, nodes, true)) -} - -// NotNodes removes elements from the Selection that match the specified nodes. -// It returns a new Selection object with the matching elements removed. -func (s *Selection) NotNodes(nodes ...*html.Node) *Selection { - return pushStack(s, winnowNodes(s, nodes, false)) -} - -// FilterSelection reduces the set of matched elements to those that match a -// node in the specified Selection object. -// It returns a new Selection object for this subset of elements. -func (s *Selection) FilterSelection(sel *Selection) *Selection { - if sel == nil { - return pushStack(s, winnowNodes(s, nil, true)) - } - return pushStack(s, winnowNodes(s, sel.Nodes, true)) -} - -// NotSelection removes elements from the Selection that match a node in the specified -// Selection object. It returns a new Selection object with the matching elements removed. -func (s *Selection) NotSelection(sel *Selection) *Selection { - if sel == nil { - return pushStack(s, winnowNodes(s, nil, false)) - } - return pushStack(s, winnowNodes(s, sel.Nodes, false)) -} - -// Intersection is an alias for FilterSelection. -func (s *Selection) Intersection(sel *Selection) *Selection { - return s.FilterSelection(sel) -} - -// Has reduces the set of matched elements to those that have a descendant -// that matches the selector. -// It returns a new Selection object with the matching elements. -func (s *Selection) Has(selector string) *Selection { - return s.HasSelection(s.document.Find(selector)) -} - -// HasMatcher reduces the set of matched elements to those that have a descendant -// that matches the matcher. -// It returns a new Selection object with the matching elements. -func (s *Selection) HasMatcher(m Matcher) *Selection { - return s.HasSelection(s.document.FindMatcher(m)) -} - -// HasNodes reduces the set of matched elements to those that have a -// descendant that matches one of the nodes. -// It returns a new Selection object with the matching elements. -func (s *Selection) HasNodes(nodes ...*html.Node) *Selection { - return s.FilterFunction(func(_ int, sel *Selection) bool { - // Add all nodes that contain one of the specified nodes - for _, n := range nodes { - if sel.Contains(n) { - return true - } - } - return false - }) -} - -// HasSelection reduces the set of matched elements to those that have a -// descendant that matches one of the nodes of the specified Selection object. -// It returns a new Selection object with the matching elements. -func (s *Selection) HasSelection(sel *Selection) *Selection { - if sel == nil { - return s.HasNodes() - } - return s.HasNodes(sel.Nodes...) -} - -// End ends the most recent filtering operation in the current chain and -// returns the set of matched elements to its previous state. -func (s *Selection) End() *Selection { - if s.prevSel != nil { - return s.prevSel - } - return newEmptySelection(s.document) -} - -// Filter based on the matcher, and the indicator to keep (Filter) or -// to get rid of (Not) the matching elements. -func winnow(sel *Selection, m Matcher, keep bool) []*html.Node { - // Optimize if keep is requested - if keep { - return m.Filter(sel.Nodes) - } - // Use grep - return grep(sel, func(i int, s *Selection) bool { - return !m.Match(s.Get(0)) - }) -} - -// Filter based on an array of nodes, and the indicator to keep (Filter) or -// to get rid of (Not) the matching elements. -func winnowNodes(sel *Selection, nodes []*html.Node, keep bool) []*html.Node { - if len(nodes)+len(sel.Nodes) < minNodesForSet { - return grep(sel, func(i int, s *Selection) bool { - return isInSlice(nodes, s.Get(0)) == keep - }) - } - - set := make(map[*html.Node]bool) - for _, n := range nodes { - set[n] = true - } - return grep(sel, func(i int, s *Selection) bool { - return set[s.Get(0)] == keep - }) -} - -// Filter based on a function test, and the indicator to keep (Filter) or -// to get rid of (Not) the matching elements. -func winnowFunction(sel *Selection, f func(int, *Selection) bool, keep bool) []*html.Node { - return grep(sel, func(i int, s *Selection) bool { - return f(i, s) == keep - }) -} diff --git a/vendor/github.com/PuerkitoBio/goquery/go.mod b/vendor/github.com/PuerkitoBio/goquery/go.mod deleted file mode 100644 index 95826ad..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/go.mod +++ /dev/null @@ -1,8 +0,0 @@ -module github.com/PuerkitoBio/goquery - -require ( - github.com/andybalholm/cascadia v1.1.0 - golang.org/x/net v0.0.0-20200202094626-16171245cfb2 -) - -go 1.13 diff --git a/vendor/github.com/PuerkitoBio/goquery/go.sum b/vendor/github.com/PuerkitoBio/goquery/go.sum deleted file mode 100644 index bc79107..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/go.sum +++ /dev/null @@ -1,8 +0,0 @@ -github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= -github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/vendor/github.com/PuerkitoBio/goquery/iteration.go b/vendor/github.com/PuerkitoBio/goquery/iteration.go deleted file mode 100644 index e246f2e..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/iteration.go +++ /dev/null @@ -1,39 +0,0 @@ -package goquery - -// Each iterates over a Selection object, executing a function for each -// matched element. It returns the current Selection object. The function -// f is called for each element in the selection with the index of the -// element in that selection starting at 0, and a *Selection that contains -// only that element. -func (s *Selection) Each(f func(int, *Selection)) *Selection { - for i, n := range s.Nodes { - f(i, newSingleSelection(n, s.document)) - } - return s -} - -// EachWithBreak iterates over a Selection object, executing a function for each -// matched element. It is identical to Each except that it is possible to break -// out of the loop by returning false in the callback function. It returns the -// current Selection object. -func (s *Selection) EachWithBreak(f func(int, *Selection) bool) *Selection { - for i, n := range s.Nodes { - if !f(i, newSingleSelection(n, s.document)) { - return s - } - } - return s -} - -// Map passes each element in the current matched set through a function, -// producing a slice of string holding the returned values. The function -// f is called for each element in the selection with the index of the -// element in that selection starting at 0, and a *Selection that contains -// only that element. -func (s *Selection) Map(f func(int, *Selection) string) (result []string) { - for i, n := range s.Nodes { - result = append(result, f(i, newSingleSelection(n, s.document))) - } - - return result -} diff --git a/vendor/github.com/PuerkitoBio/goquery/manipulation.go b/vendor/github.com/PuerkitoBio/goquery/manipulation.go deleted file mode 100644 index 35febf1..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/manipulation.go +++ /dev/null @@ -1,679 +0,0 @@ -package goquery - -import ( - "strings" - - "golang.org/x/net/html" -) - -// After applies the selector from the root document and inserts the matched elements -// after the elements in the set of matched elements. -// -// If one of the matched elements in the selection is not currently in the -// document, it's impossible to insert nodes after it, so it will be ignored. -// -// This follows the same rules as Selection.Append. -func (s *Selection) After(selector string) *Selection { - return s.AfterMatcher(compileMatcher(selector)) -} - -// AfterMatcher applies the matcher from the root document and inserts the matched elements -// after the elements in the set of matched elements. -// -// If one of the matched elements in the selection is not currently in the -// document, it's impossible to insert nodes after it, so it will be ignored. -// -// This follows the same rules as Selection.Append. -func (s *Selection) AfterMatcher(m Matcher) *Selection { - return s.AfterNodes(m.MatchAll(s.document.rootNode)...) -} - -// AfterSelection inserts the elements in the selection after each element in the set of matched -// elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) AfterSelection(sel *Selection) *Selection { - return s.AfterNodes(sel.Nodes...) -} - -// AfterHtml parses the html and inserts it after the set of matched elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) AfterHtml(htmlStr string) *Selection { - return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) { - nextSibling := node.NextSibling - for _, n := range nodes { - if node.Parent != nil { - node.Parent.InsertBefore(n, nextSibling) - } - } - }) -} - -// AfterNodes inserts the nodes after each element in the set of matched elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) AfterNodes(ns ...*html.Node) *Selection { - return s.manipulateNodes(ns, true, func(sn *html.Node, n *html.Node) { - if sn.Parent != nil { - sn.Parent.InsertBefore(n, sn.NextSibling) - } - }) -} - -// Append appends the elements specified by the selector to the end of each element -// in the set of matched elements, following those rules: -// -// 1) The selector is applied to the root document. -// -// 2) Elements that are part of the document will be moved to the new location. -// -// 3) If there are multiple locations to append to, cloned nodes will be -// appended to all target locations except the last one, which will be moved -// as noted in (2). -func (s *Selection) Append(selector string) *Selection { - return s.AppendMatcher(compileMatcher(selector)) -} - -// AppendMatcher appends the elements specified by the matcher to the end of each element -// in the set of matched elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) AppendMatcher(m Matcher) *Selection { - return s.AppendNodes(m.MatchAll(s.document.rootNode)...) -} - -// AppendSelection appends the elements in the selection to the end of each element -// in the set of matched elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) AppendSelection(sel *Selection) *Selection { - return s.AppendNodes(sel.Nodes...) -} - -// AppendHtml parses the html and appends it to the set of matched elements. -func (s *Selection) AppendHtml(htmlStr string) *Selection { - return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) { - for _, n := range nodes { - node.AppendChild(n) - } - }) -} - -// AppendNodes appends the specified nodes to each node in the set of matched elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) AppendNodes(ns ...*html.Node) *Selection { - return s.manipulateNodes(ns, false, func(sn *html.Node, n *html.Node) { - sn.AppendChild(n) - }) -} - -// Before inserts the matched elements before each element in the set of matched elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) Before(selector string) *Selection { - return s.BeforeMatcher(compileMatcher(selector)) -} - -// BeforeMatcher inserts the matched elements before each element in the set of matched elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) BeforeMatcher(m Matcher) *Selection { - return s.BeforeNodes(m.MatchAll(s.document.rootNode)...) -} - -// BeforeSelection inserts the elements in the selection before each element in the set of matched -// elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) BeforeSelection(sel *Selection) *Selection { - return s.BeforeNodes(sel.Nodes...) -} - -// BeforeHtml parses the html and inserts it before the set of matched elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) BeforeHtml(htmlStr string) *Selection { - return s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) { - for _, n := range nodes { - if node.Parent != nil { - node.Parent.InsertBefore(n, node) - } - } - }) -} - -// BeforeNodes inserts the nodes before each element in the set of matched elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) BeforeNodes(ns ...*html.Node) *Selection { - return s.manipulateNodes(ns, false, func(sn *html.Node, n *html.Node) { - if sn.Parent != nil { - sn.Parent.InsertBefore(n, sn) - } - }) -} - -// Clone creates a deep copy of the set of matched nodes. The new nodes will not be -// attached to the document. -func (s *Selection) Clone() *Selection { - ns := newEmptySelection(s.document) - ns.Nodes = cloneNodes(s.Nodes) - return ns -} - -// Empty removes all children nodes from the set of matched elements. -// It returns the children nodes in a new Selection. -func (s *Selection) Empty() *Selection { - var nodes []*html.Node - - for _, n := range s.Nodes { - for c := n.FirstChild; c != nil; c = n.FirstChild { - n.RemoveChild(c) - nodes = append(nodes, c) - } - } - - return pushStack(s, nodes) -} - -// Prepend prepends the elements specified by the selector to each element in -// the set of matched elements, following the same rules as Append. -func (s *Selection) Prepend(selector string) *Selection { - return s.PrependMatcher(compileMatcher(selector)) -} - -// PrependMatcher prepends the elements specified by the matcher to each -// element in the set of matched elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) PrependMatcher(m Matcher) *Selection { - return s.PrependNodes(m.MatchAll(s.document.rootNode)...) -} - -// PrependSelection prepends the elements in the selection to each element in -// the set of matched elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) PrependSelection(sel *Selection) *Selection { - return s.PrependNodes(sel.Nodes...) -} - -// PrependHtml parses the html and prepends it to the set of matched elements. -func (s *Selection) PrependHtml(htmlStr string) *Selection { - return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) { - firstChild := node.FirstChild - for _, n := range nodes { - node.InsertBefore(n, firstChild) - } - }) -} - -// PrependNodes prepends the specified nodes to each node in the set of -// matched elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) PrependNodes(ns ...*html.Node) *Selection { - return s.manipulateNodes(ns, true, func(sn *html.Node, n *html.Node) { - // sn.FirstChild may be nil, in which case this functions like - // sn.AppendChild() - sn.InsertBefore(n, sn.FirstChild) - }) -} - -// Remove removes the set of matched elements from the document. -// It returns the same selection, now consisting of nodes not in the document. -func (s *Selection) Remove() *Selection { - for _, n := range s.Nodes { - if n.Parent != nil { - n.Parent.RemoveChild(n) - } - } - - return s -} - -// RemoveFiltered removes from the current set of matched elements those that -// match the selector filter. It returns the Selection of removed nodes. -// -// For example if the selection s contains "

", "

" and "

" -// and s.RemoveFiltered("h2") is called, only the "

" node is removed -// (and returned), while "

" and "

" are kept in the document. -func (s *Selection) RemoveFiltered(selector string) *Selection { - return s.RemoveMatcher(compileMatcher(selector)) -} - -// RemoveMatcher removes from the current set of matched elements those that -// match the Matcher filter. It returns the Selection of removed nodes. -// See RemoveFiltered for additional information. -func (s *Selection) RemoveMatcher(m Matcher) *Selection { - return s.FilterMatcher(m).Remove() -} - -// ReplaceWith replaces each element in the set of matched elements with the -// nodes matched by the given selector. -// It returns the removed elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) ReplaceWith(selector string) *Selection { - return s.ReplaceWithMatcher(compileMatcher(selector)) -} - -// ReplaceWithMatcher replaces each element in the set of matched elements with -// the nodes matched by the given Matcher. -// It returns the removed elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) ReplaceWithMatcher(m Matcher) *Selection { - return s.ReplaceWithNodes(m.MatchAll(s.document.rootNode)...) -} - -// ReplaceWithSelection replaces each element in the set of matched elements with -// the nodes from the given Selection. -// It returns the removed elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) ReplaceWithSelection(sel *Selection) *Selection { - return s.ReplaceWithNodes(sel.Nodes...) -} - -// ReplaceWithHtml replaces each element in the set of matched elements with -// the parsed HTML. -// It returns the removed elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) ReplaceWithHtml(htmlStr string) *Selection { - s.eachNodeHtml(htmlStr, true, func(node *html.Node, nodes []*html.Node) { - nextSibling := node.NextSibling - for _, n := range nodes { - if node.Parent != nil { - node.Parent.InsertBefore(n, nextSibling) - } - } - }) - return s.Remove() -} - -// ReplaceWithNodes replaces each element in the set of matched elements with -// the given nodes. -// It returns the removed elements. -// -// This follows the same rules as Selection.Append. -func (s *Selection) ReplaceWithNodes(ns ...*html.Node) *Selection { - s.AfterNodes(ns...) - return s.Remove() -} - -// SetHtml sets the html content of each element in the selection to -// specified html string. -func (s *Selection) SetHtml(htmlStr string) *Selection { - for _, context := range s.Nodes { - for c := context.FirstChild; c != nil; c = context.FirstChild { - context.RemoveChild(c) - } - } - return s.eachNodeHtml(htmlStr, false, func(node *html.Node, nodes []*html.Node) { - for _, n := range nodes { - node.AppendChild(n) - } - }) -} - -// SetText sets the content of each element in the selection to specified content. -// The provided text string is escaped. -func (s *Selection) SetText(text string) *Selection { - return s.SetHtml(html.EscapeString(text)) -} - -// Unwrap removes the parents of the set of matched elements, leaving the matched -// elements (and their siblings, if any) in their place. -// It returns the original selection. -func (s *Selection) Unwrap() *Selection { - s.Parent().Each(func(i int, ss *Selection) { - // For some reason, jquery allows unwrap to remove the element, so - // allowing it here too. Same for . Why it allows those elements to - // be unwrapped while not allowing body is a mystery to me. - if ss.Nodes[0].Data != "body" { - ss.ReplaceWithSelection(ss.Contents()) - } - }) - - return s -} - -// Wrap wraps each element in the set of matched elements inside the first -// element matched by the given selector. The matched child is cloned before -// being inserted into the document. -// -// It returns the original set of elements. -func (s *Selection) Wrap(selector string) *Selection { - return s.WrapMatcher(compileMatcher(selector)) -} - -// WrapMatcher wraps each element in the set of matched elements inside the -// first element matched by the given matcher. The matched child is cloned -// before being inserted into the document. -// -// It returns the original set of elements. -func (s *Selection) WrapMatcher(m Matcher) *Selection { - return s.wrapNodes(m.MatchAll(s.document.rootNode)...) -} - -// WrapSelection wraps each element in the set of matched elements inside the -// first element in the given Selection. The element is cloned before being -// inserted into the document. -// -// It returns the original set of elements. -func (s *Selection) WrapSelection(sel *Selection) *Selection { - return s.wrapNodes(sel.Nodes...) -} - -// WrapHtml wraps each element in the set of matched elements inside the inner- -// most child of the given HTML. -// -// It returns the original set of elements. -func (s *Selection) WrapHtml(htmlStr string) *Selection { - nodesMap := make(map[string][]*html.Node) - for _, context := range s.Nodes { - var parent *html.Node - if context.Parent != nil { - parent = context.Parent - } else { - parent = &html.Node{Type: html.ElementNode} - } - nodes, found := nodesMap[nodeName(parent)] - if !found { - nodes = parseHtmlWithContext(htmlStr, parent) - nodesMap[nodeName(parent)] = nodes - } - newSingleSelection(context, s.document).wrapAllNodes(cloneNodes(nodes)...) - } - return s -} - -// WrapNode wraps each element in the set of matched elements inside the inner- -// most child of the given node. The given node is copied before being inserted -// into the document. -// -// It returns the original set of elements. -func (s *Selection) WrapNode(n *html.Node) *Selection { - return s.wrapNodes(n) -} - -func (s *Selection) wrapNodes(ns ...*html.Node) *Selection { - s.Each(func(i int, ss *Selection) { - ss.wrapAllNodes(ns...) - }) - - return s -} - -// WrapAll wraps a single HTML structure, matched by the given selector, around -// all elements in the set of matched elements. The matched child is cloned -// before being inserted into the document. -// -// It returns the original set of elements. -func (s *Selection) WrapAll(selector string) *Selection { - return s.WrapAllMatcher(compileMatcher(selector)) -} - -// WrapAllMatcher wraps a single HTML structure, matched by the given Matcher, -// around all elements in the set of matched elements. The matched child is -// cloned before being inserted into the document. -// -// It returns the original set of elements. -func (s *Selection) WrapAllMatcher(m Matcher) *Selection { - return s.wrapAllNodes(m.MatchAll(s.document.rootNode)...) -} - -// WrapAllSelection wraps a single HTML structure, the first node of the given -// Selection, around all elements in the set of matched elements. The matched -// child is cloned before being inserted into the document. -// -// It returns the original set of elements. -func (s *Selection) WrapAllSelection(sel *Selection) *Selection { - return s.wrapAllNodes(sel.Nodes...) -} - -// WrapAllHtml wraps the given HTML structure around all elements in the set of -// matched elements. The matched child is cloned before being inserted into the -// document. -// -// It returns the original set of elements. -func (s *Selection) WrapAllHtml(htmlStr string) *Selection { - var context *html.Node - var nodes []*html.Node - if len(s.Nodes) > 0 { - context = s.Nodes[0] - if context.Parent != nil { - nodes = parseHtmlWithContext(htmlStr, context) - } else { - nodes = parseHtml(htmlStr) - } - } - return s.wrapAllNodes(nodes...) -} - -func (s *Selection) wrapAllNodes(ns ...*html.Node) *Selection { - if len(ns) > 0 { - return s.WrapAllNode(ns[0]) - } - return s -} - -// WrapAllNode wraps the given node around the first element in the Selection, -// making all other nodes in the Selection children of the given node. The node -// is cloned before being inserted into the document. -// -// It returns the original set of elements. -func (s *Selection) WrapAllNode(n *html.Node) *Selection { - if s.Size() == 0 { - return s - } - - wrap := cloneNode(n) - - first := s.Nodes[0] - if first.Parent != nil { - first.Parent.InsertBefore(wrap, first) - first.Parent.RemoveChild(first) - } - - for c := getFirstChildEl(wrap); c != nil; c = getFirstChildEl(wrap) { - wrap = c - } - - newSingleSelection(wrap, s.document).AppendSelection(s) - - return s -} - -// WrapInner wraps an HTML structure, matched by the given selector, around the -// content of element in the set of matched elements. The matched child is -// cloned before being inserted into the document. -// -// It returns the original set of elements. -func (s *Selection) WrapInner(selector string) *Selection { - return s.WrapInnerMatcher(compileMatcher(selector)) -} - -// WrapInnerMatcher wraps an HTML structure, matched by the given selector, -// around the content of element in the set of matched elements. The matched -// child is cloned before being inserted into the document. -// -// It returns the original set of elements. -func (s *Selection) WrapInnerMatcher(m Matcher) *Selection { - return s.wrapInnerNodes(m.MatchAll(s.document.rootNode)...) -} - -// WrapInnerSelection wraps an HTML structure, matched by the given selector, -// around the content of element in the set of matched elements. The matched -// child is cloned before being inserted into the document. -// -// It returns the original set of elements. -func (s *Selection) WrapInnerSelection(sel *Selection) *Selection { - return s.wrapInnerNodes(sel.Nodes...) -} - -// WrapInnerHtml wraps an HTML structure, matched by the given selector, around -// the content of element in the set of matched elements. The matched child is -// cloned before being inserted into the document. -// -// It returns the original set of elements. -func (s *Selection) WrapInnerHtml(htmlStr string) *Selection { - nodesMap := make(map[string][]*html.Node) - for _, context := range s.Nodes { - nodes, found := nodesMap[nodeName(context)] - if !found { - nodes = parseHtmlWithContext(htmlStr, context) - nodesMap[nodeName(context)] = nodes - } - newSingleSelection(context, s.document).wrapInnerNodes(cloneNodes(nodes)...) - } - return s -} - -// WrapInnerNode wraps an HTML structure, matched by the given selector, around -// the content of element in the set of matched elements. The matched child is -// cloned before being inserted into the document. -// -// It returns the original set of elements. -func (s *Selection) WrapInnerNode(n *html.Node) *Selection { - return s.wrapInnerNodes(n) -} - -func (s *Selection) wrapInnerNodes(ns ...*html.Node) *Selection { - if len(ns) == 0 { - return s - } - - s.Each(func(i int, s *Selection) { - contents := s.Contents() - - if contents.Size() > 0 { - contents.wrapAllNodes(ns...) - } else { - s.AppendNodes(cloneNode(ns[0])) - } - }) - - return s -} - -func parseHtml(h string) []*html.Node { - // Errors are only returned when the io.Reader returns any error besides - // EOF, but strings.Reader never will - nodes, err := html.ParseFragment(strings.NewReader(h), &html.Node{Type: html.ElementNode}) - if err != nil { - panic("goquery: failed to parse HTML: " + err.Error()) - } - return nodes -} - -func parseHtmlWithContext(h string, context *html.Node) []*html.Node { - // Errors are only returned when the io.Reader returns any error besides - // EOF, but strings.Reader never will - nodes, err := html.ParseFragment(strings.NewReader(h), context) - if err != nil { - panic("goquery: failed to parse HTML: " + err.Error()) - } - return nodes -} - -// Get the first child that is an ElementNode -func getFirstChildEl(n *html.Node) *html.Node { - c := n.FirstChild - for c != nil && c.Type != html.ElementNode { - c = c.NextSibling - } - return c -} - -// Deep copy a slice of nodes. -func cloneNodes(ns []*html.Node) []*html.Node { - cns := make([]*html.Node, 0, len(ns)) - - for _, n := range ns { - cns = append(cns, cloneNode(n)) - } - - return cns -} - -// Deep copy a node. The new node has clones of all the original node's -// children but none of its parents or siblings. -func cloneNode(n *html.Node) *html.Node { - nn := &html.Node{ - Type: n.Type, - DataAtom: n.DataAtom, - Data: n.Data, - Attr: make([]html.Attribute, len(n.Attr)), - } - - copy(nn.Attr, n.Attr) - for c := n.FirstChild; c != nil; c = c.NextSibling { - nn.AppendChild(cloneNode(c)) - } - - return nn -} - -func (s *Selection) manipulateNodes(ns []*html.Node, reverse bool, - f func(sn *html.Node, n *html.Node)) *Selection { - - lasti := s.Size() - 1 - - // net.Html doesn't provide document fragments for insertion, so to get - // things in the correct order with After() and Prepend(), the callback - // needs to be called on the reverse of the nodes. - if reverse { - for i, j := 0, len(ns)-1; i < j; i, j = i+1, j-1 { - ns[i], ns[j] = ns[j], ns[i] - } - } - - for i, sn := range s.Nodes { - for _, n := range ns { - if i != lasti { - f(sn, cloneNode(n)) - } else { - if n.Parent != nil { - n.Parent.RemoveChild(n) - } - f(sn, n) - } - } - } - - return s -} - -// eachNodeHtml parses the given html string and inserts the resulting nodes in the dom with the mergeFn. -// The parsed nodes are inserted for each element of the selection. -// isParent can be used to indicate that the elements of the selection should be treated as the parent for the parsed html. -// A cache is used to avoid parsing the html multiple times should the elements of the selection result in the same context. -func (s *Selection) eachNodeHtml(htmlStr string, isParent bool, mergeFn func(n *html.Node, nodes []*html.Node)) *Selection { - // cache to avoid parsing the html for the same context multiple times - nodeCache := make(map[string][]*html.Node) - var context *html.Node - for _, n := range s.Nodes { - if isParent { - context = n.Parent - } else { - if n.Type != html.ElementNode { - continue - } - context = n - } - if context != nil { - nodes, found := nodeCache[nodeName(context)] - if !found { - nodes = parseHtmlWithContext(htmlStr, context) - nodeCache[nodeName(context)] = nodes - } - mergeFn(n, cloneNodes(nodes)) - } - } - return s -} diff --git a/vendor/github.com/PuerkitoBio/goquery/property.go b/vendor/github.com/PuerkitoBio/goquery/property.go deleted file mode 100644 index 411126d..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/property.go +++ /dev/null @@ -1,275 +0,0 @@ -package goquery - -import ( - "bytes" - "regexp" - "strings" - - "golang.org/x/net/html" -) - -var rxClassTrim = regexp.MustCompile("[\t\r\n]") - -// Attr gets the specified attribute's value for the first element in the -// Selection. To get the value for each element individually, use a looping -// construct such as Each or Map method. -func (s *Selection) Attr(attrName string) (val string, exists bool) { - if len(s.Nodes) == 0 { - return - } - return getAttributeValue(attrName, s.Nodes[0]) -} - -// AttrOr works like Attr but returns default value if attribute is not present. -func (s *Selection) AttrOr(attrName, defaultValue string) string { - if len(s.Nodes) == 0 { - return defaultValue - } - - val, exists := getAttributeValue(attrName, s.Nodes[0]) - if !exists { - return defaultValue - } - - return val -} - -// RemoveAttr removes the named attribute from each element in the set of matched elements. -func (s *Selection) RemoveAttr(attrName string) *Selection { - for _, n := range s.Nodes { - removeAttr(n, attrName) - } - - return s -} - -// SetAttr sets the given attribute on each element in the set of matched elements. -func (s *Selection) SetAttr(attrName, val string) *Selection { - for _, n := range s.Nodes { - attr := getAttributePtr(attrName, n) - if attr == nil { - n.Attr = append(n.Attr, html.Attribute{Key: attrName, Val: val}) - } else { - attr.Val = val - } - } - - return s -} - -// Text gets the combined text contents of each element in the set of matched -// elements, including their descendants. -func (s *Selection) Text() string { - var buf bytes.Buffer - - // Slightly optimized vs calling Each: no single selection object created - var f func(*html.Node) - f = func(n *html.Node) { - if n.Type == html.TextNode { - // Keep newlines and spaces, like jQuery - buf.WriteString(n.Data) - } - if n.FirstChild != nil { - for c := n.FirstChild; c != nil; c = c.NextSibling { - f(c) - } - } - } - for _, n := range s.Nodes { - f(n) - } - - return buf.String() -} - -// Size is an alias for Length. -func (s *Selection) Size() int { - return s.Length() -} - -// Length returns the number of elements in the Selection object. -func (s *Selection) Length() int { - return len(s.Nodes) -} - -// Html gets the HTML contents of the first element in the set of matched -// elements. It includes text and comment nodes. -func (s *Selection) Html() (ret string, e error) { - // Since there is no .innerHtml, the HTML content must be re-created from - // the nodes using html.Render. - var buf bytes.Buffer - - if len(s.Nodes) > 0 { - for c := s.Nodes[0].FirstChild; c != nil; c = c.NextSibling { - e = html.Render(&buf, c) - if e != nil { - return - } - } - ret = buf.String() - } - - return -} - -// AddClass adds the given class(es) to each element in the set of matched elements. -// Multiple class names can be specified, separated by a space or via multiple arguments. -func (s *Selection) AddClass(class ...string) *Selection { - classStr := strings.TrimSpace(strings.Join(class, " ")) - - if classStr == "" { - return s - } - - tcls := getClassesSlice(classStr) - for _, n := range s.Nodes { - curClasses, attr := getClassesAndAttr(n, true) - for _, newClass := range tcls { - if !strings.Contains(curClasses, " "+newClass+" ") { - curClasses += newClass + " " - } - } - - setClasses(n, attr, curClasses) - } - - return s -} - -// HasClass determines whether any of the matched elements are assigned the -// given class. -func (s *Selection) HasClass(class string) bool { - class = " " + class + " " - for _, n := range s.Nodes { - classes, _ := getClassesAndAttr(n, false) - if strings.Contains(classes, class) { - return true - } - } - return false -} - -// RemoveClass removes the given class(es) from each element in the set of matched elements. -// Multiple class names can be specified, separated by a space or via multiple arguments. -// If no class name is provided, all classes are removed. -func (s *Selection) RemoveClass(class ...string) *Selection { - var rclasses []string - - classStr := strings.TrimSpace(strings.Join(class, " ")) - remove := classStr == "" - - if !remove { - rclasses = getClassesSlice(classStr) - } - - for _, n := range s.Nodes { - if remove { - removeAttr(n, "class") - } else { - classes, attr := getClassesAndAttr(n, true) - for _, rcl := range rclasses { - classes = strings.Replace(classes, " "+rcl+" ", " ", -1) - } - - setClasses(n, attr, classes) - } - } - - return s -} - -// ToggleClass adds or removes the given class(es) for each element in the set of matched elements. -// Multiple class names can be specified, separated by a space or via multiple arguments. -func (s *Selection) ToggleClass(class ...string) *Selection { - classStr := strings.TrimSpace(strings.Join(class, " ")) - - if classStr == "" { - return s - } - - tcls := getClassesSlice(classStr) - - for _, n := range s.Nodes { - classes, attr := getClassesAndAttr(n, true) - for _, tcl := range tcls { - if strings.Contains(classes, " "+tcl+" ") { - classes = strings.Replace(classes, " "+tcl+" ", " ", -1) - } else { - classes += tcl + " " - } - } - - setClasses(n, attr, classes) - } - - return s -} - -func getAttributePtr(attrName string, n *html.Node) *html.Attribute { - if n == nil { - return nil - } - - for i, a := range n.Attr { - if a.Key == attrName { - return &n.Attr[i] - } - } - return nil -} - -// Private function to get the specified attribute's value from a node. -func getAttributeValue(attrName string, n *html.Node) (val string, exists bool) { - if a := getAttributePtr(attrName, n); a != nil { - val = a.Val - exists = true - } - return -} - -// Get and normalize the "class" attribute from the node. -func getClassesAndAttr(n *html.Node, create bool) (classes string, attr *html.Attribute) { - // Applies only to element nodes - if n.Type == html.ElementNode { - attr = getAttributePtr("class", n) - if attr == nil && create { - n.Attr = append(n.Attr, html.Attribute{ - Key: "class", - Val: "", - }) - attr = &n.Attr[len(n.Attr)-1] - } - } - - if attr == nil { - classes = " " - } else { - classes = rxClassTrim.ReplaceAllString(" "+attr.Val+" ", " ") - } - - return -} - -func getClassesSlice(classes string) []string { - return strings.Split(rxClassTrim.ReplaceAllString(" "+classes+" ", " "), " ") -} - -func removeAttr(n *html.Node, attrName string) { - for i, a := range n.Attr { - if a.Key == attrName { - n.Attr[i], n.Attr[len(n.Attr)-1], n.Attr = - n.Attr[len(n.Attr)-1], html.Attribute{}, n.Attr[:len(n.Attr)-1] - return - } - } -} - -func setClasses(n *html.Node, attr *html.Attribute, classes string) { - classes = strings.TrimSpace(classes) - if classes == "" { - removeAttr(n, "class") - return - } - - attr.Val = classes -} diff --git a/vendor/github.com/PuerkitoBio/goquery/query.go b/vendor/github.com/PuerkitoBio/goquery/query.go deleted file mode 100644 index fe86bf0..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/query.go +++ /dev/null @@ -1,49 +0,0 @@ -package goquery - -import "golang.org/x/net/html" - -// Is checks the current matched set of elements against a selector and -// returns true if at least one of these elements matches. -func (s *Selection) Is(selector string) bool { - return s.IsMatcher(compileMatcher(selector)) -} - -// IsMatcher checks the current matched set of elements against a matcher and -// returns true if at least one of these elements matches. -func (s *Selection) IsMatcher(m Matcher) bool { - if len(s.Nodes) > 0 { - if len(s.Nodes) == 1 { - return m.Match(s.Nodes[0]) - } - return len(m.Filter(s.Nodes)) > 0 - } - - return false -} - -// IsFunction checks the current matched set of elements against a predicate and -// returns true if at least one of these elements matches. -func (s *Selection) IsFunction(f func(int, *Selection) bool) bool { - return s.FilterFunction(f).Length() > 0 -} - -// IsSelection checks the current matched set of elements against a Selection object -// and returns true if at least one of these elements matches. -func (s *Selection) IsSelection(sel *Selection) bool { - return s.FilterSelection(sel).Length() > 0 -} - -// IsNodes checks the current matched set of elements against the specified nodes -// and returns true if at least one of these elements matches. -func (s *Selection) IsNodes(nodes ...*html.Node) bool { - return s.FilterNodes(nodes...).Length() > 0 -} - -// Contains returns true if the specified Node is within, -// at any depth, one of the nodes in the Selection object. -// It is NOT inclusive, to behave like jQuery's implementation, and -// unlike Javascript's .contains, so if the contained -// node is itself in the selection, it returns false. -func (s *Selection) Contains(n *html.Node) bool { - return sliceContains(s.Nodes, n) -} diff --git a/vendor/github.com/PuerkitoBio/goquery/traversal.go b/vendor/github.com/PuerkitoBio/goquery/traversal.go deleted file mode 100644 index 5fa5315..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/traversal.go +++ /dev/null @@ -1,698 +0,0 @@ -package goquery - -import "golang.org/x/net/html" - -type siblingType int - -// Sibling type, used internally when iterating over children at the same -// level (siblings) to specify which nodes are requested. -const ( - siblingPrevUntil siblingType = iota - 3 - siblingPrevAll - siblingPrev - siblingAll - siblingNext - siblingNextAll - siblingNextUntil - siblingAllIncludingNonElements -) - -// Find gets the descendants of each element in the current set of matched -// elements, filtered by a selector. It returns a new Selection object -// containing these matched elements. -func (s *Selection) Find(selector string) *Selection { - return pushStack(s, findWithMatcher(s.Nodes, compileMatcher(selector))) -} - -// FindMatcher gets the descendants of each element in the current set of matched -// elements, filtered by the matcher. It returns a new Selection object -// containing these matched elements. -func (s *Selection) FindMatcher(m Matcher) *Selection { - return pushStack(s, findWithMatcher(s.Nodes, m)) -} - -// FindSelection gets the descendants of each element in the current -// Selection, filtered by a Selection. It returns a new Selection object -// containing these matched elements. -func (s *Selection) FindSelection(sel *Selection) *Selection { - if sel == nil { - return pushStack(s, nil) - } - return s.FindNodes(sel.Nodes...) -} - -// FindNodes gets the descendants of each element in the current -// Selection, filtered by some nodes. It returns a new Selection object -// containing these matched elements. -func (s *Selection) FindNodes(nodes ...*html.Node) *Selection { - return pushStack(s, mapNodes(nodes, func(i int, n *html.Node) []*html.Node { - if sliceContains(s.Nodes, n) { - return []*html.Node{n} - } - return nil - })) -} - -// Contents gets the children of each element in the Selection, -// including text and comment nodes. It returns a new Selection object -// containing these elements. -func (s *Selection) Contents() *Selection { - return pushStack(s, getChildrenNodes(s.Nodes, siblingAllIncludingNonElements)) -} - -// ContentsFiltered gets the children of each element in the Selection, -// filtered by the specified selector. It returns a new Selection -// object containing these elements. Since selectors only act on Element nodes, -// this function is an alias to ChildrenFiltered unless the selector is empty, -// in which case it is an alias to Contents. -func (s *Selection) ContentsFiltered(selector string) *Selection { - if selector != "" { - return s.ChildrenFiltered(selector) - } - return s.Contents() -} - -// ContentsMatcher gets the children of each element in the Selection, -// filtered by the specified matcher. It returns a new Selection -// object containing these elements. Since matchers only act on Element nodes, -// this function is an alias to ChildrenMatcher. -func (s *Selection) ContentsMatcher(m Matcher) *Selection { - return s.ChildrenMatcher(m) -} - -// Children gets the child elements of each element in the Selection. -// It returns a new Selection object containing these elements. -func (s *Selection) Children() *Selection { - return pushStack(s, getChildrenNodes(s.Nodes, siblingAll)) -} - -// ChildrenFiltered gets the child elements of each element in the Selection, -// filtered by the specified selector. It returns a new -// Selection object containing these elements. -func (s *Selection) ChildrenFiltered(selector string) *Selection { - return filterAndPush(s, getChildrenNodes(s.Nodes, siblingAll), compileMatcher(selector)) -} - -// ChildrenMatcher gets the child elements of each element in the Selection, -// filtered by the specified matcher. It returns a new -// Selection object containing these elements. -func (s *Selection) ChildrenMatcher(m Matcher) *Selection { - return filterAndPush(s, getChildrenNodes(s.Nodes, siblingAll), m) -} - -// Parent gets the parent of each element in the Selection. It returns a -// new Selection object containing the matched elements. -func (s *Selection) Parent() *Selection { - return pushStack(s, getParentNodes(s.Nodes)) -} - -// ParentFiltered gets the parent of each element in the Selection filtered by a -// selector. It returns a new Selection object containing the matched elements. -func (s *Selection) ParentFiltered(selector string) *Selection { - return filterAndPush(s, getParentNodes(s.Nodes), compileMatcher(selector)) -} - -// ParentMatcher gets the parent of each element in the Selection filtered by a -// matcher. It returns a new Selection object containing the matched elements. -func (s *Selection) ParentMatcher(m Matcher) *Selection { - return filterAndPush(s, getParentNodes(s.Nodes), m) -} - -// Closest gets the first element that matches the selector by testing the -// element itself and traversing up through its ancestors in the DOM tree. -func (s *Selection) Closest(selector string) *Selection { - cs := compileMatcher(selector) - return s.ClosestMatcher(cs) -} - -// ClosestMatcher gets the first element that matches the matcher by testing the -// element itself and traversing up through its ancestors in the DOM tree. -func (s *Selection) ClosestMatcher(m Matcher) *Selection { - return pushStack(s, mapNodes(s.Nodes, func(i int, n *html.Node) []*html.Node { - // For each node in the selection, test the node itself, then each parent - // until a match is found. - for ; n != nil; n = n.Parent { - if m.Match(n) { - return []*html.Node{n} - } - } - return nil - })) -} - -// ClosestNodes gets the first element that matches one of the nodes by testing the -// element itself and traversing up through its ancestors in the DOM tree. -func (s *Selection) ClosestNodes(nodes ...*html.Node) *Selection { - set := make(map[*html.Node]bool) - for _, n := range nodes { - set[n] = true - } - return pushStack(s, mapNodes(s.Nodes, func(i int, n *html.Node) []*html.Node { - // For each node in the selection, test the node itself, then each parent - // until a match is found. - for ; n != nil; n = n.Parent { - if set[n] { - return []*html.Node{n} - } - } - return nil - })) -} - -// ClosestSelection gets the first element that matches one of the nodes in the -// Selection by testing the element itself and traversing up through its ancestors -// in the DOM tree. -func (s *Selection) ClosestSelection(sel *Selection) *Selection { - if sel == nil { - return pushStack(s, nil) - } - return s.ClosestNodes(sel.Nodes...) -} - -// Parents gets the ancestors of each element in the current Selection. It -// returns a new Selection object with the matched elements. -func (s *Selection) Parents() *Selection { - return pushStack(s, getParentsNodes(s.Nodes, nil, nil)) -} - -// ParentsFiltered gets the ancestors of each element in the current -// Selection. It returns a new Selection object with the matched elements. -func (s *Selection) ParentsFiltered(selector string) *Selection { - return filterAndPush(s, getParentsNodes(s.Nodes, nil, nil), compileMatcher(selector)) -} - -// ParentsMatcher gets the ancestors of each element in the current -// Selection. It returns a new Selection object with the matched elements. -func (s *Selection) ParentsMatcher(m Matcher) *Selection { - return filterAndPush(s, getParentsNodes(s.Nodes, nil, nil), m) -} - -// ParentsUntil gets the ancestors of each element in the Selection, up to but -// not including the element matched by the selector. It returns a new Selection -// object containing the matched elements. -func (s *Selection) ParentsUntil(selector string) *Selection { - return pushStack(s, getParentsNodes(s.Nodes, compileMatcher(selector), nil)) -} - -// ParentsUntilMatcher gets the ancestors of each element in the Selection, up to but -// not including the element matched by the matcher. It returns a new Selection -// object containing the matched elements. -func (s *Selection) ParentsUntilMatcher(m Matcher) *Selection { - return pushStack(s, getParentsNodes(s.Nodes, m, nil)) -} - -// ParentsUntilSelection gets the ancestors of each element in the Selection, -// up to but not including the elements in the specified Selection. It returns a -// new Selection object containing the matched elements. -func (s *Selection) ParentsUntilSelection(sel *Selection) *Selection { - if sel == nil { - return s.Parents() - } - return s.ParentsUntilNodes(sel.Nodes...) -} - -// ParentsUntilNodes gets the ancestors of each element in the Selection, -// up to but not including the specified nodes. It returns a -// new Selection object containing the matched elements. -func (s *Selection) ParentsUntilNodes(nodes ...*html.Node) *Selection { - return pushStack(s, getParentsNodes(s.Nodes, nil, nodes)) -} - -// ParentsFilteredUntil is like ParentsUntil, with the option to filter the -// results based on a selector string. It returns a new Selection -// object containing the matched elements. -func (s *Selection) ParentsFilteredUntil(filterSelector, untilSelector string) *Selection { - return filterAndPush(s, getParentsNodes(s.Nodes, compileMatcher(untilSelector), nil), compileMatcher(filterSelector)) -} - -// ParentsFilteredUntilMatcher is like ParentsUntilMatcher, with the option to filter the -// results based on a matcher. It returns a new Selection object containing the matched elements. -func (s *Selection) ParentsFilteredUntilMatcher(filter, until Matcher) *Selection { - return filterAndPush(s, getParentsNodes(s.Nodes, until, nil), filter) -} - -// ParentsFilteredUntilSelection is like ParentsUntilSelection, with the -// option to filter the results based on a selector string. It returns a new -// Selection object containing the matched elements. -func (s *Selection) ParentsFilteredUntilSelection(filterSelector string, sel *Selection) *Selection { - return s.ParentsMatcherUntilSelection(compileMatcher(filterSelector), sel) -} - -// ParentsMatcherUntilSelection is like ParentsUntilSelection, with the -// option to filter the results based on a matcher. It returns a new -// Selection object containing the matched elements. -func (s *Selection) ParentsMatcherUntilSelection(filter Matcher, sel *Selection) *Selection { - if sel == nil { - return s.ParentsMatcher(filter) - } - return s.ParentsMatcherUntilNodes(filter, sel.Nodes...) -} - -// ParentsFilteredUntilNodes is like ParentsUntilNodes, with the -// option to filter the results based on a selector string. It returns a new -// Selection object containing the matched elements. -func (s *Selection) ParentsFilteredUntilNodes(filterSelector string, nodes ...*html.Node) *Selection { - return filterAndPush(s, getParentsNodes(s.Nodes, nil, nodes), compileMatcher(filterSelector)) -} - -// ParentsMatcherUntilNodes is like ParentsUntilNodes, with the -// option to filter the results based on a matcher. It returns a new -// Selection object containing the matched elements. -func (s *Selection) ParentsMatcherUntilNodes(filter Matcher, nodes ...*html.Node) *Selection { - return filterAndPush(s, getParentsNodes(s.Nodes, nil, nodes), filter) -} - -// Siblings gets the siblings of each element in the Selection. It returns -// a new Selection object containing the matched elements. -func (s *Selection) Siblings() *Selection { - return pushStack(s, getSiblingNodes(s.Nodes, siblingAll, nil, nil)) -} - -// SiblingsFiltered gets the siblings of each element in the Selection -// filtered by a selector. It returns a new Selection object containing the -// matched elements. -func (s *Selection) SiblingsFiltered(selector string) *Selection { - return filterAndPush(s, getSiblingNodes(s.Nodes, siblingAll, nil, nil), compileMatcher(selector)) -} - -// SiblingsMatcher gets the siblings of each element in the Selection -// filtered by a matcher. It returns a new Selection object containing the -// matched elements. -func (s *Selection) SiblingsMatcher(m Matcher) *Selection { - return filterAndPush(s, getSiblingNodes(s.Nodes, siblingAll, nil, nil), m) -} - -// Next gets the immediately following sibling of each element in the -// Selection. It returns a new Selection object containing the matched elements. -func (s *Selection) Next() *Selection { - return pushStack(s, getSiblingNodes(s.Nodes, siblingNext, nil, nil)) -} - -// NextFiltered gets the immediately following sibling of each element in the -// Selection filtered by a selector. It returns a new Selection object -// containing the matched elements. -func (s *Selection) NextFiltered(selector string) *Selection { - return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNext, nil, nil), compileMatcher(selector)) -} - -// NextMatcher gets the immediately following sibling of each element in the -// Selection filtered by a matcher. It returns a new Selection object -// containing the matched elements. -func (s *Selection) NextMatcher(m Matcher) *Selection { - return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNext, nil, nil), m) -} - -// NextAll gets all the following siblings of each element in the -// Selection. It returns a new Selection object containing the matched elements. -func (s *Selection) NextAll() *Selection { - return pushStack(s, getSiblingNodes(s.Nodes, siblingNextAll, nil, nil)) -} - -// NextAllFiltered gets all the following siblings of each element in the -// Selection filtered by a selector. It returns a new Selection object -// containing the matched elements. -func (s *Selection) NextAllFiltered(selector string) *Selection { - return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextAll, nil, nil), compileMatcher(selector)) -} - -// NextAllMatcher gets all the following siblings of each element in the -// Selection filtered by a matcher. It returns a new Selection object -// containing the matched elements. -func (s *Selection) NextAllMatcher(m Matcher) *Selection { - return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextAll, nil, nil), m) -} - -// Prev gets the immediately preceding sibling of each element in the -// Selection. It returns a new Selection object containing the matched elements. -func (s *Selection) Prev() *Selection { - return pushStack(s, getSiblingNodes(s.Nodes, siblingPrev, nil, nil)) -} - -// PrevFiltered gets the immediately preceding sibling of each element in the -// Selection filtered by a selector. It returns a new Selection object -// containing the matched elements. -func (s *Selection) PrevFiltered(selector string) *Selection { - return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrev, nil, nil), compileMatcher(selector)) -} - -// PrevMatcher gets the immediately preceding sibling of each element in the -// Selection filtered by a matcher. It returns a new Selection object -// containing the matched elements. -func (s *Selection) PrevMatcher(m Matcher) *Selection { - return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrev, nil, nil), m) -} - -// PrevAll gets all the preceding siblings of each element in the -// Selection. It returns a new Selection object containing the matched elements. -func (s *Selection) PrevAll() *Selection { - return pushStack(s, getSiblingNodes(s.Nodes, siblingPrevAll, nil, nil)) -} - -// PrevAllFiltered gets all the preceding siblings of each element in the -// Selection filtered by a selector. It returns a new Selection object -// containing the matched elements. -func (s *Selection) PrevAllFiltered(selector string) *Selection { - return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevAll, nil, nil), compileMatcher(selector)) -} - -// PrevAllMatcher gets all the preceding siblings of each element in the -// Selection filtered by a matcher. It returns a new Selection object -// containing the matched elements. -func (s *Selection) PrevAllMatcher(m Matcher) *Selection { - return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevAll, nil, nil), m) -} - -// NextUntil gets all following siblings of each element up to but not -// including the element matched by the selector. It returns a new Selection -// object containing the matched elements. -func (s *Selection) NextUntil(selector string) *Selection { - return pushStack(s, getSiblingNodes(s.Nodes, siblingNextUntil, - compileMatcher(selector), nil)) -} - -// NextUntilMatcher gets all following siblings of each element up to but not -// including the element matched by the matcher. It returns a new Selection -// object containing the matched elements. -func (s *Selection) NextUntilMatcher(m Matcher) *Selection { - return pushStack(s, getSiblingNodes(s.Nodes, siblingNextUntil, - m, nil)) -} - -// NextUntilSelection gets all following siblings of each element up to but not -// including the element matched by the Selection. It returns a new Selection -// object containing the matched elements. -func (s *Selection) NextUntilSelection(sel *Selection) *Selection { - if sel == nil { - return s.NextAll() - } - return s.NextUntilNodes(sel.Nodes...) -} - -// NextUntilNodes gets all following siblings of each element up to but not -// including the element matched by the nodes. It returns a new Selection -// object containing the matched elements. -func (s *Selection) NextUntilNodes(nodes ...*html.Node) *Selection { - return pushStack(s, getSiblingNodes(s.Nodes, siblingNextUntil, - nil, nodes)) -} - -// PrevUntil gets all preceding siblings of each element up to but not -// including the element matched by the selector. It returns a new Selection -// object containing the matched elements. -func (s *Selection) PrevUntil(selector string) *Selection { - return pushStack(s, getSiblingNodes(s.Nodes, siblingPrevUntil, - compileMatcher(selector), nil)) -} - -// PrevUntilMatcher gets all preceding siblings of each element up to but not -// including the element matched by the matcher. It returns a new Selection -// object containing the matched elements. -func (s *Selection) PrevUntilMatcher(m Matcher) *Selection { - return pushStack(s, getSiblingNodes(s.Nodes, siblingPrevUntil, - m, nil)) -} - -// PrevUntilSelection gets all preceding siblings of each element up to but not -// including the element matched by the Selection. It returns a new Selection -// object containing the matched elements. -func (s *Selection) PrevUntilSelection(sel *Selection) *Selection { - if sel == nil { - return s.PrevAll() - } - return s.PrevUntilNodes(sel.Nodes...) -} - -// PrevUntilNodes gets all preceding siblings of each element up to but not -// including the element matched by the nodes. It returns a new Selection -// object containing the matched elements. -func (s *Selection) PrevUntilNodes(nodes ...*html.Node) *Selection { - return pushStack(s, getSiblingNodes(s.Nodes, siblingPrevUntil, - nil, nodes)) -} - -// NextFilteredUntil is like NextUntil, with the option to filter -// the results based on a selector string. -// It returns a new Selection object containing the matched elements. -func (s *Selection) NextFilteredUntil(filterSelector, untilSelector string) *Selection { - return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextUntil, - compileMatcher(untilSelector), nil), compileMatcher(filterSelector)) -} - -// NextFilteredUntilMatcher is like NextUntilMatcher, with the option to filter -// the results based on a matcher. -// It returns a new Selection object containing the matched elements. -func (s *Selection) NextFilteredUntilMatcher(filter, until Matcher) *Selection { - return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextUntil, - until, nil), filter) -} - -// NextFilteredUntilSelection is like NextUntilSelection, with the -// option to filter the results based on a selector string. It returns a new -// Selection object containing the matched elements. -func (s *Selection) NextFilteredUntilSelection(filterSelector string, sel *Selection) *Selection { - return s.NextMatcherUntilSelection(compileMatcher(filterSelector), sel) -} - -// NextMatcherUntilSelection is like NextUntilSelection, with the -// option to filter the results based on a matcher. It returns a new -// Selection object containing the matched elements. -func (s *Selection) NextMatcherUntilSelection(filter Matcher, sel *Selection) *Selection { - if sel == nil { - return s.NextMatcher(filter) - } - return s.NextMatcherUntilNodes(filter, sel.Nodes...) -} - -// NextFilteredUntilNodes is like NextUntilNodes, with the -// option to filter the results based on a selector string. It returns a new -// Selection object containing the matched elements. -func (s *Selection) NextFilteredUntilNodes(filterSelector string, nodes ...*html.Node) *Selection { - return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextUntil, - nil, nodes), compileMatcher(filterSelector)) -} - -// NextMatcherUntilNodes is like NextUntilNodes, with the -// option to filter the results based on a matcher. It returns a new -// Selection object containing the matched elements. -func (s *Selection) NextMatcherUntilNodes(filter Matcher, nodes ...*html.Node) *Selection { - return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextUntil, - nil, nodes), filter) -} - -// PrevFilteredUntil is like PrevUntil, with the option to filter -// the results based on a selector string. -// It returns a new Selection object containing the matched elements. -func (s *Selection) PrevFilteredUntil(filterSelector, untilSelector string) *Selection { - return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevUntil, - compileMatcher(untilSelector), nil), compileMatcher(filterSelector)) -} - -// PrevFilteredUntilMatcher is like PrevUntilMatcher, with the option to filter -// the results based on a matcher. -// It returns a new Selection object containing the matched elements. -func (s *Selection) PrevFilteredUntilMatcher(filter, until Matcher) *Selection { - return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevUntil, - until, nil), filter) -} - -// PrevFilteredUntilSelection is like PrevUntilSelection, with the -// option to filter the results based on a selector string. It returns a new -// Selection object containing the matched elements. -func (s *Selection) PrevFilteredUntilSelection(filterSelector string, sel *Selection) *Selection { - return s.PrevMatcherUntilSelection(compileMatcher(filterSelector), sel) -} - -// PrevMatcherUntilSelection is like PrevUntilSelection, with the -// option to filter the results based on a matcher. It returns a new -// Selection object containing the matched elements. -func (s *Selection) PrevMatcherUntilSelection(filter Matcher, sel *Selection) *Selection { - if sel == nil { - return s.PrevMatcher(filter) - } - return s.PrevMatcherUntilNodes(filter, sel.Nodes...) -} - -// PrevFilteredUntilNodes is like PrevUntilNodes, with the -// option to filter the results based on a selector string. It returns a new -// Selection object containing the matched elements. -func (s *Selection) PrevFilteredUntilNodes(filterSelector string, nodes ...*html.Node) *Selection { - return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevUntil, - nil, nodes), compileMatcher(filterSelector)) -} - -// PrevMatcherUntilNodes is like PrevUntilNodes, with the -// option to filter the results based on a matcher. It returns a new -// Selection object containing the matched elements. -func (s *Selection) PrevMatcherUntilNodes(filter Matcher, nodes ...*html.Node) *Selection { - return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevUntil, - nil, nodes), filter) -} - -// Filter and push filters the nodes based on a matcher, and pushes the results -// on the stack, with the srcSel as previous selection. -func filterAndPush(srcSel *Selection, nodes []*html.Node, m Matcher) *Selection { - // Create a temporary Selection with the specified nodes to filter using winnow - sel := &Selection{nodes, srcSel.document, nil} - // Filter based on matcher and push on stack - return pushStack(srcSel, winnow(sel, m, true)) -} - -// Internal implementation of Find that return raw nodes. -func findWithMatcher(nodes []*html.Node, m Matcher) []*html.Node { - // Map nodes to find the matches within the children of each node - return mapNodes(nodes, func(i int, n *html.Node) (result []*html.Node) { - // Go down one level, becausejQuery's Find selects only within descendants - for c := n.FirstChild; c != nil; c = c.NextSibling { - if c.Type == html.ElementNode { - result = append(result, m.MatchAll(c)...) - } - } - return - }) -} - -// Internal implementation to get all parent nodes, stopping at the specified -// node (or nil if no stop). -func getParentsNodes(nodes []*html.Node, stopm Matcher, stopNodes []*html.Node) []*html.Node { - return mapNodes(nodes, func(i int, n *html.Node) (result []*html.Node) { - for p := n.Parent; p != nil; p = p.Parent { - sel := newSingleSelection(p, nil) - if stopm != nil { - if sel.IsMatcher(stopm) { - break - } - } else if len(stopNodes) > 0 { - if sel.IsNodes(stopNodes...) { - break - } - } - if p.Type == html.ElementNode { - result = append(result, p) - } - } - return - }) -} - -// Internal implementation of sibling nodes that return a raw slice of matches. -func getSiblingNodes(nodes []*html.Node, st siblingType, untilm Matcher, untilNodes []*html.Node) []*html.Node { - var f func(*html.Node) bool - - // If the requested siblings are ...Until, create the test function to - // determine if the until condition is reached (returns true if it is) - if st == siblingNextUntil || st == siblingPrevUntil { - f = func(n *html.Node) bool { - if untilm != nil { - // Matcher-based condition - sel := newSingleSelection(n, nil) - return sel.IsMatcher(untilm) - } else if len(untilNodes) > 0 { - // Nodes-based condition - sel := newSingleSelection(n, nil) - return sel.IsNodes(untilNodes...) - } - return false - } - } - - return mapNodes(nodes, func(i int, n *html.Node) []*html.Node { - return getChildrenWithSiblingType(n.Parent, st, n, f) - }) -} - -// Gets the children nodes of each node in the specified slice of nodes, -// based on the sibling type request. -func getChildrenNodes(nodes []*html.Node, st siblingType) []*html.Node { - return mapNodes(nodes, func(i int, n *html.Node) []*html.Node { - return getChildrenWithSiblingType(n, st, nil, nil) - }) -} - -// Gets the children of the specified parent, based on the requested sibling -// type, skipping a specified node if required. -func getChildrenWithSiblingType(parent *html.Node, st siblingType, skipNode *html.Node, - untilFunc func(*html.Node) bool) (result []*html.Node) { - - // Create the iterator function - var iter = func(cur *html.Node) (ret *html.Node) { - // Based on the sibling type requested, iterate the right way - for { - switch st { - case siblingAll, siblingAllIncludingNonElements: - if cur == nil { - // First iteration, start with first child of parent - // Skip node if required - if ret = parent.FirstChild; ret == skipNode && skipNode != nil { - ret = skipNode.NextSibling - } - } else { - // Skip node if required - if ret = cur.NextSibling; ret == skipNode && skipNode != nil { - ret = skipNode.NextSibling - } - } - case siblingPrev, siblingPrevAll, siblingPrevUntil: - if cur == nil { - // Start with previous sibling of the skip node - ret = skipNode.PrevSibling - } else { - ret = cur.PrevSibling - } - case siblingNext, siblingNextAll, siblingNextUntil: - if cur == nil { - // Start with next sibling of the skip node - ret = skipNode.NextSibling - } else { - ret = cur.NextSibling - } - default: - panic("Invalid sibling type.") - } - if ret == nil || ret.Type == html.ElementNode || st == siblingAllIncludingNonElements { - return - } - // Not a valid node, try again from this one - cur = ret - } - } - - for c := iter(nil); c != nil; c = iter(c) { - // If this is an ...Until case, test before append (returns true - // if the until condition is reached) - if st == siblingNextUntil || st == siblingPrevUntil { - if untilFunc(c) { - return - } - } - result = append(result, c) - if st == siblingNext || st == siblingPrev { - // Only one node was requested (immediate next or previous), so exit - return - } - } - return -} - -// Internal implementation of parent nodes that return a raw slice of Nodes. -func getParentNodes(nodes []*html.Node) []*html.Node { - return mapNodes(nodes, func(i int, n *html.Node) []*html.Node { - if n.Parent != nil && n.Parent.Type == html.ElementNode { - return []*html.Node{n.Parent} - } - return nil - }) -} - -// Internal map function used by many traversing methods. Takes the source nodes -// to iterate on and the mapping function that returns an array of nodes. -// Returns an array of nodes mapped by calling the callback function once for -// each node in the source nodes. -func mapNodes(nodes []*html.Node, f func(int, *html.Node) []*html.Node) (result []*html.Node) { - set := make(map[*html.Node]bool) - for i, n := range nodes { - if vals := f(i, n); len(vals) > 0 { - result = appendWithoutDuplicates(result, vals, set) - } - } - return result -} diff --git a/vendor/github.com/PuerkitoBio/goquery/type.go b/vendor/github.com/PuerkitoBio/goquery/type.go deleted file mode 100644 index 6646c14..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/type.go +++ /dev/null @@ -1,203 +0,0 @@ -package goquery - -import ( - "errors" - "io" - "net/http" - "net/url" - - "github.com/andybalholm/cascadia" - "golang.org/x/net/html" -) - -// Document represents an HTML document to be manipulated. Unlike jQuery, which -// is loaded as part of a DOM document, and thus acts upon its containing -// document, GoQuery doesn't know which HTML document to act upon. So it needs -// to be told, and that's what the Document class is for. It holds the root -// document node to manipulate, and can make selections on this document. -type Document struct { - *Selection - Url *url.URL - rootNode *html.Node -} - -// NewDocumentFromNode is a Document constructor that takes a root html Node -// as argument. -func NewDocumentFromNode(root *html.Node) *Document { - return newDocument(root, nil) -} - -// NewDocument is a Document constructor that takes a string URL as argument. -// It loads the specified document, parses it, and stores the root Document -// node, ready to be manipulated. -// -// Deprecated: Use the net/http standard library package to make the request -// and validate the response before calling goquery.NewDocumentFromReader -// with the response's body. -func NewDocument(url string) (*Document, error) { - // Load the URL - res, e := http.Get(url) - if e != nil { - return nil, e - } - return NewDocumentFromResponse(res) -} - -// NewDocumentFromReader returns a Document from an io.Reader. -// It returns an error as second value if the reader's data cannot be parsed -// as html. It does not check if the reader is also an io.Closer, the -// provided reader is never closed by this call. It is the responsibility -// of the caller to close it if required. -func NewDocumentFromReader(r io.Reader) (*Document, error) { - root, e := html.Parse(r) - if e != nil { - return nil, e - } - return newDocument(root, nil), nil -} - -// NewDocumentFromResponse is another Document constructor that takes an http response as argument. -// It loads the specified response's document, parses it, and stores the root Document -// node, ready to be manipulated. The response's body is closed on return. -// -// Deprecated: Use goquery.NewDocumentFromReader with the response's body. -func NewDocumentFromResponse(res *http.Response) (*Document, error) { - if res == nil { - return nil, errors.New("Response is nil") - } - defer res.Body.Close() - if res.Request == nil { - return nil, errors.New("Response.Request is nil") - } - - // Parse the HTML into nodes - root, e := html.Parse(res.Body) - if e != nil { - return nil, e - } - - // Create and fill the document - return newDocument(root, res.Request.URL), nil -} - -// CloneDocument creates a deep-clone of a document. -func CloneDocument(doc *Document) *Document { - return newDocument(cloneNode(doc.rootNode), doc.Url) -} - -// Private constructor, make sure all fields are correctly filled. -func newDocument(root *html.Node, url *url.URL) *Document { - // Create and fill the document - d := &Document{nil, url, root} - d.Selection = newSingleSelection(root, d) - return d -} - -// Selection represents a collection of nodes matching some criteria. The -// initial Selection can be created by using Document.Find, and then -// manipulated using the jQuery-like chainable syntax and methods. -type Selection struct { - Nodes []*html.Node - document *Document - prevSel *Selection -} - -// Helper constructor to create an empty selection -func newEmptySelection(doc *Document) *Selection { - return &Selection{nil, doc, nil} -} - -// Helper constructor to create a selection of only one node -func newSingleSelection(node *html.Node, doc *Document) *Selection { - return &Selection{[]*html.Node{node}, doc, nil} -} - -// Matcher is an interface that defines the methods to match -// HTML nodes against a compiled selector string. Cascadia's -// Selector implements this interface. -type Matcher interface { - Match(*html.Node) bool - MatchAll(*html.Node) []*html.Node - Filter([]*html.Node) []*html.Node -} - -// Single compiles a selector string to a Matcher that stops after the first -// match is found. -// -// By default, Selection.Find and other functions that accept a selector string -// to select nodes will use all matches corresponding to that selector. By -// using the Matcher returned by Single, at most the first match will be -// selected. -// -// For example, those two statements are semantically equivalent: -// -// sel1 := doc.Find("a").First() -// sel2 := doc.FindMatcher(goquery.Single("a")) -// -// The one using Single is optimized to be potentially much faster on large -// documents. -// -// Only the behaviour of the MatchAll method of the Matcher interface is -// altered compared to standard Matchers. This means that the single-selection -// property of the Matcher only applies for Selection methods where the Matcher -// is used to select nodes, not to filter or check if a node matches the -// Matcher - in those cases, the behaviour of the Matcher is unchanged (e.g. -// FilterMatcher(Single("div")) will still result in a Selection with multiple -// "div"s if there were many "div"s in the Selection to begin with). -func Single(selector string) Matcher { - return singleMatcher{compileMatcher(selector)} -} - -// SingleMatcher returns a Matcher matches the same nodes as m, but that stops -// after the first match is found. -// -// See the documentation of function Single for more details. -func SingleMatcher(m Matcher) Matcher { - if _, ok := m.(singleMatcher); ok { - // m is already a singleMatcher - return m - } - return singleMatcher{m} -} - -// compileMatcher compiles the selector string s and returns -// the corresponding Matcher. If s is an invalid selector string, -// it returns a Matcher that fails all matches. -func compileMatcher(s string) Matcher { - cs, err := cascadia.Compile(s) - if err != nil { - return invalidMatcher{} - } - return cs -} - -type singleMatcher struct { - Matcher -} - -func (m singleMatcher) MatchAll(n *html.Node) []*html.Node { - // Optimized version - stops finding at the first match (cascadia-compiled - // matchers all use this code path). - if mm, ok := m.Matcher.(interface{ MatchFirst(*html.Node) *html.Node }); ok { - node := mm.MatchFirst(n) - if node == nil { - return nil - } - return []*html.Node{node} - } - - // Fallback version, for e.g. test mocks that don't provide the MatchFirst - // method. - nodes := m.Matcher.MatchAll(n) - if len(nodes) > 0 { - return nodes[:1:1] - } - return nil -} - -// invalidMatcher is a Matcher that always fails to match. -type invalidMatcher struct{} - -func (invalidMatcher) Match(n *html.Node) bool { return false } -func (invalidMatcher) MatchAll(n *html.Node) []*html.Node { return nil } -func (invalidMatcher) Filter(ns []*html.Node) []*html.Node { return nil } diff --git a/vendor/github.com/PuerkitoBio/goquery/utilities.go b/vendor/github.com/PuerkitoBio/goquery/utilities.go deleted file mode 100644 index 3e11b1d..0000000 --- a/vendor/github.com/PuerkitoBio/goquery/utilities.go +++ /dev/null @@ -1,171 +0,0 @@ -package goquery - -import ( - "bytes" - - "golang.org/x/net/html" -) - -// used to determine if a set (map[*html.Node]bool) should be used -// instead of iterating over a slice. The set uses more memory and -// is slower than slice iteration for small N. -const minNodesForSet = 1000 - -var nodeNames = []string{ - html.ErrorNode: "#error", - html.TextNode: "#text", - html.DocumentNode: "#document", - html.CommentNode: "#comment", -} - -// NodeName returns the node name of the first element in the selection. -// It tries to behave in a similar way as the DOM's nodeName property -// (https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeName). -// -// Go's net/html package defines the following node types, listed with -// the corresponding returned value from this function: -// -// ErrorNode : #error -// TextNode : #text -// DocumentNode : #document -// ElementNode : the element's tag name -// CommentNode : #comment -// DoctypeNode : the name of the document type -// -func NodeName(s *Selection) string { - if s.Length() == 0 { - return "" - } - return nodeName(s.Get(0)) -} - -// nodeName returns the node name of the given html node. -// See NodeName for additional details on behaviour. -func nodeName(node *html.Node) string { - if node == nil { - return "" - } - - switch node.Type { - case html.ElementNode, html.DoctypeNode: - return node.Data - default: - if node.Type >= 0 && int(node.Type) < len(nodeNames) { - return nodeNames[node.Type] - } - return "" - } -} - -// OuterHtml returns the outer HTML rendering of the first item in -// the selection - that is, the HTML including the first element's -// tag and attributes. -// -// Unlike InnerHtml, this is a function and not a method on the Selection, -// because this is not a jQuery method (in javascript-land, this is -// a property provided by the DOM). -func OuterHtml(s *Selection) (string, error) { - var buf bytes.Buffer - - if s.Length() == 0 { - return "", nil - } - n := s.Get(0) - if err := html.Render(&buf, n); err != nil { - return "", err - } - return buf.String(), nil -} - -// Loop through all container nodes to search for the target node. -func sliceContains(container []*html.Node, contained *html.Node) bool { - for _, n := range container { - if nodeContains(n, contained) { - return true - } - } - - return false -} - -// Checks if the contained node is within the container node. -func nodeContains(container *html.Node, contained *html.Node) bool { - // Check if the parent of the contained node is the container node, traversing - // upward until the top is reached, or the container is found. - for contained = contained.Parent; contained != nil; contained = contained.Parent { - if container == contained { - return true - } - } - return false -} - -// Checks if the target node is in the slice of nodes. -func isInSlice(slice []*html.Node, node *html.Node) bool { - return indexInSlice(slice, node) > -1 -} - -// Returns the index of the target node in the slice, or -1. -func indexInSlice(slice []*html.Node, node *html.Node) int { - if node != nil { - for i, n := range slice { - if n == node { - return i - } - } - } - return -1 -} - -// Appends the new nodes to the target slice, making sure no duplicate is added. -// There is no check to the original state of the target slice, so it may still -// contain duplicates. The target slice is returned because append() may create -// a new underlying array. If targetSet is nil, a local set is created with the -// target if len(target) + len(nodes) is greater than minNodesForSet. -func appendWithoutDuplicates(target []*html.Node, nodes []*html.Node, targetSet map[*html.Node]bool) []*html.Node { - // if there are not that many nodes, don't use the map, faster to just use nested loops - // (unless a non-nil targetSet is passed, in which case the caller knows better). - if targetSet == nil && len(target)+len(nodes) < minNodesForSet { - for _, n := range nodes { - if !isInSlice(target, n) { - target = append(target, n) - } - } - return target - } - - // if a targetSet is passed, then assume it is reliable, otherwise create one - // and initialize it with the current target contents. - if targetSet == nil { - targetSet = make(map[*html.Node]bool, len(target)) - for _, n := range target { - targetSet[n] = true - } - } - for _, n := range nodes { - if !targetSet[n] { - target = append(target, n) - targetSet[n] = true - } - } - - return target -} - -// Loop through a selection, returning only those nodes that pass the predicate -// function. -func grep(sel *Selection, predicate func(i int, s *Selection) bool) (result []*html.Node) { - for i, n := range sel.Nodes { - if predicate(i, newSingleSelection(n, sel.document)) { - result = append(result, n) - } - } - return result -} - -// Creates a new Selection object based on the specified nodes, and keeps the -// source Selection object on the stack (linked list). -func pushStack(fromSel *Selection, nodes []*html.Node) *Selection { - result := &Selection{nodes, fromSel.document, fromSel} - return result -} diff --git a/vendor/github.com/andybalholm/cascadia/.travis.yml b/vendor/github.com/andybalholm/cascadia/.travis.yml deleted file mode 100644 index 6f22751..0000000 --- a/vendor/github.com/andybalholm/cascadia/.travis.yml +++ /dev/null @@ -1,14 +0,0 @@ -language: go - -go: - - 1.3 - - 1.4 - -install: - - go get github.com/andybalholm/cascadia - -script: - - go test -v - -notifications: - email: false diff --git a/vendor/github.com/andybalholm/cascadia/BUILD.bazel b/vendor/github.com/andybalholm/cascadia/BUILD.bazel deleted file mode 100644 index 0392cac..0000000 --- a/vendor/github.com/andybalholm/cascadia/BUILD.bazel +++ /dev/null @@ -1,15 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -go_library( - name = "cascadia", - srcs = [ - "parser.go", - "selector.go", - "serialize.go", - "specificity.go", - ], - importmap = "peridot.resf.org/vendor/github.com/andybalholm/cascadia", - importpath = "github.com/andybalholm/cascadia", - visibility = ["//visibility:public"], - deps = ["@org_golang_x_net//html"], -) diff --git a/vendor/github.com/andybalholm/cascadia/LICENSE b/vendor/github.com/andybalholm/cascadia/LICENSE deleted file mode 100644 index ee5ad35..0000000 --- a/vendor/github.com/andybalholm/cascadia/LICENSE +++ /dev/null @@ -1,24 +0,0 @@ -Copyright (c) 2011 Andy Balholm. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/andybalholm/cascadia/README.md b/vendor/github.com/andybalholm/cascadia/README.md deleted file mode 100644 index 26f4c37..0000000 --- a/vendor/github.com/andybalholm/cascadia/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# cascadia - -[![](https://travis-ci.org/andybalholm/cascadia.svg)](https://travis-ci.org/andybalholm/cascadia) - -The Cascadia package implements CSS selectors for use with the parse trees produced by the html package. - -To test CSS selectors without writing Go code, check out [cascadia](https://github.com/suntong/cascadia) the command line tool, a thin wrapper around this package. - -[Refer to godoc here](https://godoc.org/github.com/andybalholm/cascadia). diff --git a/vendor/github.com/andybalholm/cascadia/go.mod b/vendor/github.com/andybalholm/cascadia/go.mod deleted file mode 100644 index 51a330b..0000000 --- a/vendor/github.com/andybalholm/cascadia/go.mod +++ /dev/null @@ -1,5 +0,0 @@ -module github.com/andybalholm/cascadia - -require golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01 - -go 1.13 diff --git a/vendor/github.com/andybalholm/cascadia/parser.go b/vendor/github.com/andybalholm/cascadia/parser.go deleted file mode 100644 index c40a39f..0000000 --- a/vendor/github.com/andybalholm/cascadia/parser.go +++ /dev/null @@ -1,838 +0,0 @@ -// Package cascadia is an implementation of CSS selectors. -package cascadia - -import ( - "errors" - "fmt" - "regexp" - "strconv" - "strings" -) - -// a parser for CSS selectors -type parser struct { - s string // the source text - i int // the current position - - // if `false`, parsing a pseudo-element - // returns an error. - acceptPseudoElements bool -} - -// parseEscape parses a backslash escape. -func (p *parser) parseEscape() (result string, err error) { - if len(p.s) < p.i+2 || p.s[p.i] != '\\' { - return "", errors.New("invalid escape sequence") - } - - start := p.i + 1 - c := p.s[start] - switch { - case c == '\r' || c == '\n' || c == '\f': - return "", errors.New("escaped line ending outside string") - case hexDigit(c): - // unicode escape (hex) - var i int - for i = start; i < start+6 && i < len(p.s) && hexDigit(p.s[i]); i++ { - // empty - } - v, _ := strconv.ParseUint(p.s[start:i], 16, 21) - if len(p.s) > i { - switch p.s[i] { - case '\r': - i++ - if len(p.s) > i && p.s[i] == '\n' { - i++ - } - case ' ', '\t', '\n', '\f': - i++ - } - } - p.i = i - return string(rune(v)), nil - } - - // Return the literal character after the backslash. - result = p.s[start : start+1] - p.i += 2 - return result, nil -} - -// toLowerASCII returns s with all ASCII capital letters lowercased. -func toLowerASCII(s string) string { - var b []byte - for i := 0; i < len(s); i++ { - if c := s[i]; 'A' <= c && c <= 'Z' { - if b == nil { - b = make([]byte, len(s)) - copy(b, s) - } - b[i] = s[i] + ('a' - 'A') - } - } - - if b == nil { - return s - } - - return string(b) -} - -func hexDigit(c byte) bool { - return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' -} - -// nameStart returns whether c can be the first character of an identifier -// (not counting an initial hyphen, or an escape sequence). -func nameStart(c byte) bool { - return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_' || c > 127 -} - -// nameChar returns whether c can be a character within an identifier -// (not counting an escape sequence). -func nameChar(c byte) bool { - return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_' || c > 127 || - c == '-' || '0' <= c && c <= '9' -} - -// parseIdentifier parses an identifier. -func (p *parser) parseIdentifier() (result string, err error) { - startingDash := false - if len(p.s) > p.i && p.s[p.i] == '-' { - startingDash = true - p.i++ - } - - if len(p.s) <= p.i { - return "", errors.New("expected identifier, found EOF instead") - } - - if c := p.s[p.i]; !(nameStart(c) || c == '\\') { - return "", fmt.Errorf("expected identifier, found %c instead", c) - } - - result, err = p.parseName() - if startingDash && err == nil { - result = "-" + result - } - return -} - -// parseName parses a name (which is like an identifier, but doesn't have -// extra restrictions on the first character). -func (p *parser) parseName() (result string, err error) { - i := p.i -loop: - for i < len(p.s) { - c := p.s[i] - switch { - case nameChar(c): - start := i - for i < len(p.s) && nameChar(p.s[i]) { - i++ - } - result += p.s[start:i] - case c == '\\': - p.i = i - val, err := p.parseEscape() - if err != nil { - return "", err - } - i = p.i - result += val - default: - break loop - } - } - - if result == "" { - return "", errors.New("expected name, found EOF instead") - } - - p.i = i - return result, nil -} - -// parseString parses a single- or double-quoted string. -func (p *parser) parseString() (result string, err error) { - i := p.i - if len(p.s) < i+2 { - return "", errors.New("expected string, found EOF instead") - } - - quote := p.s[i] - i++ - -loop: - for i < len(p.s) { - switch p.s[i] { - case '\\': - if len(p.s) > i+1 { - switch c := p.s[i+1]; c { - case '\r': - if len(p.s) > i+2 && p.s[i+2] == '\n' { - i += 3 - continue loop - } - fallthrough - case '\n', '\f': - i += 2 - continue loop - } - } - p.i = i - val, err := p.parseEscape() - if err != nil { - return "", err - } - i = p.i - result += val - case quote: - break loop - case '\r', '\n', '\f': - return "", errors.New("unexpected end of line in string") - default: - start := i - for i < len(p.s) { - if c := p.s[i]; c == quote || c == '\\' || c == '\r' || c == '\n' || c == '\f' { - break - } - i++ - } - result += p.s[start:i] - } - } - - if i >= len(p.s) { - return "", errors.New("EOF in string") - } - - // Consume the final quote. - i++ - - p.i = i - return result, nil -} - -// parseRegex parses a regular expression; the end is defined by encountering an -// unmatched closing ')' or ']' which is not consumed -func (p *parser) parseRegex() (rx *regexp.Regexp, err error) { - i := p.i - if len(p.s) < i+2 { - return nil, errors.New("expected regular expression, found EOF instead") - } - - // number of open parens or brackets; - // when it becomes negative, finished parsing regex - open := 0 - -loop: - for i < len(p.s) { - switch p.s[i] { - case '(', '[': - open++ - case ')', ']': - open-- - if open < 0 { - break loop - } - } - i++ - } - - if i >= len(p.s) { - return nil, errors.New("EOF in regular expression") - } - rx, err = regexp.Compile(p.s[p.i:i]) - p.i = i - return rx, err -} - -// skipWhitespace consumes whitespace characters and comments. -// It returns true if there was actually anything to skip. -func (p *parser) skipWhitespace() bool { - i := p.i - for i < len(p.s) { - switch p.s[i] { - case ' ', '\t', '\r', '\n', '\f': - i++ - continue - case '/': - if strings.HasPrefix(p.s[i:], "/*") { - end := strings.Index(p.s[i+len("/*"):], "*/") - if end != -1 { - i += end + len("/**/") - continue - } - } - } - break - } - - if i > p.i { - p.i = i - return true - } - - return false -} - -// consumeParenthesis consumes an opening parenthesis and any following -// whitespace. It returns true if there was actually a parenthesis to skip. -func (p *parser) consumeParenthesis() bool { - if p.i < len(p.s) && p.s[p.i] == '(' { - p.i++ - p.skipWhitespace() - return true - } - return false -} - -// consumeClosingParenthesis consumes a closing parenthesis and any preceding -// whitespace. It returns true if there was actually a parenthesis to skip. -func (p *parser) consumeClosingParenthesis() bool { - i := p.i - p.skipWhitespace() - if p.i < len(p.s) && p.s[p.i] == ')' { - p.i++ - return true - } - p.i = i - return false -} - -// parseTypeSelector parses a type selector (one that matches by tag name). -func (p *parser) parseTypeSelector() (result tagSelector, err error) { - tag, err := p.parseIdentifier() - if err != nil { - return - } - return tagSelector{tag: toLowerASCII(tag)}, nil -} - -// parseIDSelector parses a selector that matches by id attribute. -func (p *parser) parseIDSelector() (idSelector, error) { - if p.i >= len(p.s) { - return idSelector{}, fmt.Errorf("expected id selector (#id), found EOF instead") - } - if p.s[p.i] != '#' { - return idSelector{}, fmt.Errorf("expected id selector (#id), found '%c' instead", p.s[p.i]) - } - - p.i++ - id, err := p.parseName() - if err != nil { - return idSelector{}, err - } - - return idSelector{id: id}, nil -} - -// parseClassSelector parses a selector that matches by class attribute. -func (p *parser) parseClassSelector() (classSelector, error) { - if p.i >= len(p.s) { - return classSelector{}, fmt.Errorf("expected class selector (.class), found EOF instead") - } - if p.s[p.i] != '.' { - return classSelector{}, fmt.Errorf("expected class selector (.class), found '%c' instead", p.s[p.i]) - } - - p.i++ - class, err := p.parseIdentifier() - if err != nil { - return classSelector{}, err - } - - return classSelector{class: class}, nil -} - -// parseAttributeSelector parses a selector that matches by attribute value. -func (p *parser) parseAttributeSelector() (attrSelector, error) { - if p.i >= len(p.s) { - return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead") - } - if p.s[p.i] != '[' { - return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", p.s[p.i]) - } - - p.i++ - p.skipWhitespace() - key, err := p.parseIdentifier() - if err != nil { - return attrSelector{}, err - } - key = toLowerASCII(key) - - p.skipWhitespace() - if p.i >= len(p.s) { - return attrSelector{}, errors.New("unexpected EOF in attribute selector") - } - - if p.s[p.i] == ']' { - p.i++ - return attrSelector{key: key, operation: ""}, nil - } - - if p.i+2 >= len(p.s) { - return attrSelector{}, errors.New("unexpected EOF in attribute selector") - } - - op := p.s[p.i : p.i+2] - if op[0] == '=' { - op = "=" - } else if op[1] != '=' { - return attrSelector{}, fmt.Errorf(`expected equality operator, found "%s" instead`, op) - } - p.i += len(op) - - p.skipWhitespace() - if p.i >= len(p.s) { - return attrSelector{}, errors.New("unexpected EOF in attribute selector") - } - var val string - var rx *regexp.Regexp - if op == "#=" { - rx, err = p.parseRegex() - } else { - switch p.s[p.i] { - case '\'', '"': - val, err = p.parseString() - default: - val, err = p.parseIdentifier() - } - } - if err != nil { - return attrSelector{}, err - } - - p.skipWhitespace() - if p.i >= len(p.s) { - return attrSelector{}, errors.New("unexpected EOF in attribute selector") - } - if p.s[p.i] != ']' { - return attrSelector{}, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i]) - } - p.i++ - - switch op { - case "=", "!=", "~=", "|=", "^=", "$=", "*=", "#=": - return attrSelector{key: key, val: val, operation: op, regexp: rx}, nil - default: - return attrSelector{}, fmt.Errorf("attribute operator %q is not supported", op) - } -} - -var errExpectedParenthesis = errors.New("expected '(' but didn't find it") -var errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it") -var errUnmatchedParenthesis = errors.New("unmatched '('") - -// parsePseudoclassSelector parses a pseudoclass selector like :not(p) or a pseudo-element -// For backwards compatibility, both ':' and '::' prefix are allowed for pseudo-elements. -// https://drafts.csswg.org/selectors-3/#pseudo-elements -// Returning a nil `Sel` (and a nil `error`) means we found a pseudo-element. -func (p *parser) parsePseudoclassSelector() (out Sel, pseudoElement string, err error) { - if p.i >= len(p.s) { - return nil, "", fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead") - } - if p.s[p.i] != ':' { - return nil, "", fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i]) - } - - p.i++ - var mustBePseudoElement bool - if p.i >= len(p.s) { - return nil, "", fmt.Errorf("got empty pseudoclass (or pseudoelement)") - } - if p.s[p.i] == ':' { // we found a pseudo-element - mustBePseudoElement = true - p.i++ - } - - name, err := p.parseIdentifier() - if err != nil { - return - } - name = toLowerASCII(name) - if mustBePseudoElement && (name != "after" && name != "backdrop" && name != "before" && - name != "cue" && name != "first-letter" && name != "first-line" && name != "grammar-error" && - name != "marker" && name != "placeholder" && name != "selection" && name != "spelling-error") { - return out, "", fmt.Errorf("unknown pseudoelement :%s", name) - } - - switch name { - case "not", "has", "haschild": - if !p.consumeParenthesis() { - return out, "", errExpectedParenthesis - } - sel, parseErr := p.parseSelectorGroup() - if parseErr != nil { - return out, "", parseErr - } - if !p.consumeClosingParenthesis() { - return out, "", errExpectedClosingParenthesis - } - - out = relativePseudoClassSelector{name: name, match: sel} - - case "contains", "containsown": - if !p.consumeParenthesis() { - return out, "", errExpectedParenthesis - } - if p.i == len(p.s) { - return out, "", errUnmatchedParenthesis - } - var val string - switch p.s[p.i] { - case '\'', '"': - val, err = p.parseString() - default: - val, err = p.parseIdentifier() - } - if err != nil { - return out, "", err - } - val = strings.ToLower(val) - p.skipWhitespace() - if p.i >= len(p.s) { - return out, "", errors.New("unexpected EOF in pseudo selector") - } - if !p.consumeClosingParenthesis() { - return out, "", errExpectedClosingParenthesis - } - - out = containsPseudoClassSelector{own: name == "containsown", value: val} - - case "matches", "matchesown": - if !p.consumeParenthesis() { - return out, "", errExpectedParenthesis - } - rx, err := p.parseRegex() - if err != nil { - return out, "", err - } - if p.i >= len(p.s) { - return out, "", errors.New("unexpected EOF in pseudo selector") - } - if !p.consumeClosingParenthesis() { - return out, "", errExpectedClosingParenthesis - } - - out = regexpPseudoClassSelector{own: name == "matchesown", regexp: rx} - - case "nth-child", "nth-last-child", "nth-of-type", "nth-last-of-type": - if !p.consumeParenthesis() { - return out, "", errExpectedParenthesis - } - a, b, err := p.parseNth() - if err != nil { - return out, "", err - } - if !p.consumeClosingParenthesis() { - return out, "", errExpectedClosingParenthesis - } - last := name == "nth-last-child" || name == "nth-last-of-type" - ofType := name == "nth-of-type" || name == "nth-last-of-type" - out = nthPseudoClassSelector{a: a, b: b, last: last, ofType: ofType} - - case "first-child": - out = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: false} - case "last-child": - out = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: true} - case "first-of-type": - out = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: false} - case "last-of-type": - out = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: true} - case "only-child": - out = onlyChildPseudoClassSelector{ofType: false} - case "only-of-type": - out = onlyChildPseudoClassSelector{ofType: true} - case "input": - out = inputPseudoClassSelector{} - case "empty": - out = emptyElementPseudoClassSelector{} - case "root": - out = rootPseudoClassSelector{} - case "after", "backdrop", "before", "cue", "first-letter", "first-line", "grammar-error", "marker", "placeholder", "selection", "spelling-error": - return nil, name, nil - default: - return out, "", fmt.Errorf("unknown pseudoclass or pseudoelement :%s", name) - } - return -} - -// parseInteger parses a decimal integer. -func (p *parser) parseInteger() (int, error) { - i := p.i - start := i - for i < len(p.s) && '0' <= p.s[i] && p.s[i] <= '9' { - i++ - } - if i == start { - return 0, errors.New("expected integer, but didn't find it") - } - p.i = i - - val, err := strconv.Atoi(p.s[start:i]) - if err != nil { - return 0, err - } - - return val, nil -} - -// parseNth parses the argument for :nth-child (normally of the form an+b). -func (p *parser) parseNth() (a, b int, err error) { - // initial state - if p.i >= len(p.s) { - goto eof - } - switch p.s[p.i] { - case '-': - p.i++ - goto negativeA - case '+': - p.i++ - goto positiveA - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - goto positiveA - case 'n', 'N': - a = 1 - p.i++ - goto readN - case 'o', 'O', 'e', 'E': - id, nameErr := p.parseName() - if nameErr != nil { - return 0, 0, nameErr - } - id = toLowerASCII(id) - if id == "odd" { - return 2, 1, nil - } - if id == "even" { - return 2, 0, nil - } - return 0, 0, fmt.Errorf("expected 'odd' or 'even', but found '%s' instead", id) - default: - goto invalid - } - -positiveA: - if p.i >= len(p.s) { - goto eof - } - switch p.s[p.i] { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - a, err = p.parseInteger() - if err != nil { - return 0, 0, err - } - goto readA - case 'n', 'N': - a = 1 - p.i++ - goto readN - default: - goto invalid - } - -negativeA: - if p.i >= len(p.s) { - goto eof - } - switch p.s[p.i] { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - a, err = p.parseInteger() - if err != nil { - return 0, 0, err - } - a = -a - goto readA - case 'n', 'N': - a = -1 - p.i++ - goto readN - default: - goto invalid - } - -readA: - if p.i >= len(p.s) { - goto eof - } - switch p.s[p.i] { - case 'n', 'N': - p.i++ - goto readN - default: - // The number we read as a is actually b. - return 0, a, nil - } - -readN: - p.skipWhitespace() - if p.i >= len(p.s) { - goto eof - } - switch p.s[p.i] { - case '+': - p.i++ - p.skipWhitespace() - b, err = p.parseInteger() - if err != nil { - return 0, 0, err - } - return a, b, nil - case '-': - p.i++ - p.skipWhitespace() - b, err = p.parseInteger() - if err != nil { - return 0, 0, err - } - return a, -b, nil - default: - return a, 0, nil - } - -eof: - return 0, 0, errors.New("unexpected EOF while attempting to parse expression of form an+b") - -invalid: - return 0, 0, errors.New("unexpected character while attempting to parse expression of form an+b") -} - -// parseSimpleSelectorSequence parses a selector sequence that applies to -// a single element. -func (p *parser) parseSimpleSelectorSequence() (Sel, error) { - var selectors []Sel - - if p.i >= len(p.s) { - return nil, errors.New("expected selector, found EOF instead") - } - - switch p.s[p.i] { - case '*': - // It's the universal selector. Just skip over it, since it doesn't affect the meaning. - p.i++ - case '#', '.', '[', ':': - // There's no type selector. Wait to process the other till the main loop. - default: - r, err := p.parseTypeSelector() - if err != nil { - return nil, err - } - selectors = append(selectors, r) - } - - var pseudoElement string -loop: - for p.i < len(p.s) { - var ( - ns Sel - newPseudoElement string - err error - ) - switch p.s[p.i] { - case '#': - ns, err = p.parseIDSelector() - case '.': - ns, err = p.parseClassSelector() - case '[': - ns, err = p.parseAttributeSelector() - case ':': - ns, newPseudoElement, err = p.parsePseudoclassSelector() - default: - break loop - } - if err != nil { - return nil, err - } - // From https://drafts.csswg.org/selectors-3/#pseudo-elements : - // "Only one pseudo-element may appear per selector, and if present - // it must appear after the sequence of simple selectors that - // represents the subjects of the selector."" - if ns == nil { // we found a pseudo-element - if pseudoElement != "" { - return nil, fmt.Errorf("only one pseudo-element is accepted per selector, got %s and %s", pseudoElement, newPseudoElement) - } - if !p.acceptPseudoElements { - return nil, fmt.Errorf("pseudo-element %s found, but pseudo-elements support is disabled", newPseudoElement) - } - pseudoElement = newPseudoElement - } else { - if pseudoElement != "" { - return nil, fmt.Errorf("pseudo-element %s must be at the end of selector", pseudoElement) - } - selectors = append(selectors, ns) - } - - } - if len(selectors) == 1 && pseudoElement == "" { // no need wrap the selectors in compoundSelector - return selectors[0], nil - } - return compoundSelector{selectors: selectors, pseudoElement: pseudoElement}, nil -} - -// parseSelector parses a selector that may include combinators. -func (p *parser) parseSelector() (Sel, error) { - p.skipWhitespace() - result, err := p.parseSimpleSelectorSequence() - if err != nil { - return nil, err - } - - for { - var ( - combinator byte - c Sel - ) - if p.skipWhitespace() { - combinator = ' ' - } - if p.i >= len(p.s) { - return result, nil - } - - switch p.s[p.i] { - case '+', '>', '~': - combinator = p.s[p.i] - p.i++ - p.skipWhitespace() - case ',', ')': - // These characters can't begin a selector, but they can legally occur after one. - return result, nil - } - - if combinator == 0 { - return result, nil - } - - c, err = p.parseSimpleSelectorSequence() - if err != nil { - return nil, err - } - result = combinedSelector{first: result, combinator: combinator, second: c} - } -} - -// parseSelectorGroup parses a group of selectors, separated by commas. -func (p *parser) parseSelectorGroup() (SelectorGroup, error) { - current, err := p.parseSelector() - if err != nil { - return nil, err - } - result := SelectorGroup{current} - - for p.i < len(p.s) { - if p.s[p.i] != ',' { - break - } - p.i++ - c, err := p.parseSelector() - if err != nil { - return nil, err - } - result = append(result, c) - } - return result, nil -} diff --git a/vendor/github.com/andybalholm/cascadia/selector.go b/vendor/github.com/andybalholm/cascadia/selector.go deleted file mode 100644 index e2a6dc4..0000000 --- a/vendor/github.com/andybalholm/cascadia/selector.go +++ /dev/null @@ -1,938 +0,0 @@ -package cascadia - -import ( - "bytes" - "fmt" - "regexp" - "strings" - - "golang.org/x/net/html" -) - -// Matcher is the interface for basic selector functionality. -// Match returns whether a selector matches n. -type Matcher interface { - Match(n *html.Node) bool -} - -// Sel is the interface for all the functionality provided by selectors. -type Sel interface { - Matcher - Specificity() Specificity - - // Returns a CSS input compiling to this selector. - String() string - - // Returns a pseudo-element, or an empty string. - PseudoElement() string -} - -// Parse parses a selector. Use `ParseWithPseudoElement` -// if you need support for pseudo-elements. -func Parse(sel string) (Sel, error) { - p := &parser{s: sel} - compiled, err := p.parseSelector() - if err != nil { - return nil, err - } - - if p.i < len(sel) { - return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i) - } - - return compiled, nil -} - -// ParseWithPseudoElement parses a single selector, -// with support for pseudo-element. -func ParseWithPseudoElement(sel string) (Sel, error) { - p := &parser{s: sel, acceptPseudoElements: true} - compiled, err := p.parseSelector() - if err != nil { - return nil, err - } - - if p.i < len(sel) { - return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i) - } - - return compiled, nil -} - -// ParseGroup parses a selector, or a group of selectors separated by commas. -// Use `ParseGroupWithPseudoElements` -// if you need support for pseudo-elements. -func ParseGroup(sel string) (SelectorGroup, error) { - p := &parser{s: sel} - compiled, err := p.parseSelectorGroup() - if err != nil { - return nil, err - } - - if p.i < len(sel) { - return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i) - } - - return compiled, nil -} - -// ParseGroupWithPseudoElements parses a selector, or a group of selectors separated by commas. -// It supports pseudo-elements. -func ParseGroupWithPseudoElements(sel string) (SelectorGroup, error) { - p := &parser{s: sel, acceptPseudoElements: true} - compiled, err := p.parseSelectorGroup() - if err != nil { - return nil, err - } - - if p.i < len(sel) { - return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i) - } - - return compiled, nil -} - -// A Selector is a function which tells whether a node matches or not. -// -// This type is maintained for compatibility; I recommend using the newer and -// more idiomatic interfaces Sel and Matcher. -type Selector func(*html.Node) bool - -// Compile parses a selector and returns, if successful, a Selector object -// that can be used to match against html.Node objects. -func Compile(sel string) (Selector, error) { - compiled, err := ParseGroup(sel) - if err != nil { - return nil, err - } - - return Selector(compiled.Match), nil -} - -// MustCompile is like Compile, but panics instead of returning an error. -func MustCompile(sel string) Selector { - compiled, err := Compile(sel) - if err != nil { - panic(err) - } - return compiled -} - -// MatchAll returns a slice of the nodes that match the selector, -// from n and its children. -func (s Selector) MatchAll(n *html.Node) []*html.Node { - return s.matchAllInto(n, nil) -} - -func (s Selector) matchAllInto(n *html.Node, storage []*html.Node) []*html.Node { - if s(n) { - storage = append(storage, n) - } - - for child := n.FirstChild; child != nil; child = child.NextSibling { - storage = s.matchAllInto(child, storage) - } - - return storage -} - -func queryInto(n *html.Node, m Matcher, storage []*html.Node) []*html.Node { - for child := n.FirstChild; child != nil; child = child.NextSibling { - if m.Match(child) { - storage = append(storage, child) - } - storage = queryInto(child, m, storage) - } - - return storage -} - -// QueryAll returns a slice of all the nodes that match m, from the descendants -// of n. -func QueryAll(n *html.Node, m Matcher) []*html.Node { - return queryInto(n, m, nil) -} - -// Match returns true if the node matches the selector. -func (s Selector) Match(n *html.Node) bool { - return s(n) -} - -// MatchFirst returns the first node that matches s, from n and its children. -func (s Selector) MatchFirst(n *html.Node) *html.Node { - if s.Match(n) { - return n - } - - for c := n.FirstChild; c != nil; c = c.NextSibling { - m := s.MatchFirst(c) - if m != nil { - return m - } - } - return nil -} - -// Query returns the first node that matches m, from the descendants of n. -// If none matches, it returns nil. -func Query(n *html.Node, m Matcher) *html.Node { - for c := n.FirstChild; c != nil; c = c.NextSibling { - if m.Match(c) { - return c - } - if matched := Query(c, m); matched != nil { - return matched - } - } - - return nil -} - -// Filter returns the nodes in nodes that match the selector. -func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) { - for _, n := range nodes { - if s(n) { - result = append(result, n) - } - } - return result -} - -// Filter returns the nodes that match m. -func Filter(nodes []*html.Node, m Matcher) (result []*html.Node) { - for _, n := range nodes { - if m.Match(n) { - result = append(result, n) - } - } - return result -} - -type tagSelector struct { - tag string -} - -// Matches elements with a given tag name. -func (t tagSelector) Match(n *html.Node) bool { - return n.Type == html.ElementNode && n.Data == t.tag -} - -func (c tagSelector) Specificity() Specificity { - return Specificity{0, 0, 1} -} - -func (c tagSelector) PseudoElement() string { - return "" -} - -type classSelector struct { - class string -} - -// Matches elements by class attribute. -func (t classSelector) Match(n *html.Node) bool { - return matchAttribute(n, "class", func(s string) bool { - return matchInclude(t.class, s) - }) -} - -func (c classSelector) Specificity() Specificity { - return Specificity{0, 1, 0} -} - -func (c classSelector) PseudoElement() string { - return "" -} - -type idSelector struct { - id string -} - -// Matches elements by id attribute. -func (t idSelector) Match(n *html.Node) bool { - return matchAttribute(n, "id", func(s string) bool { - return s == t.id - }) -} - -func (c idSelector) Specificity() Specificity { - return Specificity{1, 0, 0} -} - -func (c idSelector) PseudoElement() string { - return "" -} - -type attrSelector struct { - key, val, operation string - regexp *regexp.Regexp -} - -// Matches elements by attribute value. -func (t attrSelector) Match(n *html.Node) bool { - switch t.operation { - case "": - return matchAttribute(n, t.key, func(string) bool { return true }) - case "=": - return matchAttribute(n, t.key, func(s string) bool { return s == t.val }) - case "!=": - return attributeNotEqualMatch(t.key, t.val, n) - case "~=": - // matches elements where the attribute named key is a whitespace-separated list that includes val. - return matchAttribute(n, t.key, func(s string) bool { return matchInclude(t.val, s) }) - case "|=": - return attributeDashMatch(t.key, t.val, n) - case "^=": - return attributePrefixMatch(t.key, t.val, n) - case "$=": - return attributeSuffixMatch(t.key, t.val, n) - case "*=": - return attributeSubstringMatch(t.key, t.val, n) - case "#=": - return attributeRegexMatch(t.key, t.regexp, n) - default: - panic(fmt.Sprintf("unsuported operation : %s", t.operation)) - } -} - -// matches elements where the attribute named key satisifes the function f. -func matchAttribute(n *html.Node, key string, f func(string) bool) bool { - if n.Type != html.ElementNode { - return false - } - for _, a := range n.Attr { - if a.Key == key && f(a.Val) { - return true - } - } - return false -} - -// attributeNotEqualMatch matches elements where -// the attribute named key does not have the value val. -func attributeNotEqualMatch(key, val string, n *html.Node) bool { - if n.Type != html.ElementNode { - return false - } - for _, a := range n.Attr { - if a.Key == key && a.Val == val { - return false - } - } - return true -} - -// returns true if s is a whitespace-separated list that includes val. -func matchInclude(val, s string) bool { - for s != "" { - i := strings.IndexAny(s, " \t\r\n\f") - if i == -1 { - return s == val - } - if s[:i] == val { - return true - } - s = s[i+1:] - } - return false -} - -// matches elements where the attribute named key equals val or starts with val plus a hyphen. -func attributeDashMatch(key, val string, n *html.Node) bool { - return matchAttribute(n, key, - func(s string) bool { - if s == val { - return true - } - if len(s) <= len(val) { - return false - } - if s[:len(val)] == val && s[len(val)] == '-' { - return true - } - return false - }) -} - -// attributePrefixMatch returns a Selector that matches elements where -// the attribute named key starts with val. -func attributePrefixMatch(key, val string, n *html.Node) bool { - return matchAttribute(n, key, - func(s string) bool { - if strings.TrimSpace(s) == "" { - return false - } - return strings.HasPrefix(s, val) - }) -} - -// attributeSuffixMatch matches elements where -// the attribute named key ends with val. -func attributeSuffixMatch(key, val string, n *html.Node) bool { - return matchAttribute(n, key, - func(s string) bool { - if strings.TrimSpace(s) == "" { - return false - } - return strings.HasSuffix(s, val) - }) -} - -// attributeSubstringMatch matches nodes where -// the attribute named key contains val. -func attributeSubstringMatch(key, val string, n *html.Node) bool { - return matchAttribute(n, key, - func(s string) bool { - if strings.TrimSpace(s) == "" { - return false - } - return strings.Contains(s, val) - }) -} - -// attributeRegexMatch matches nodes where -// the attribute named key matches the regular expression rx -func attributeRegexMatch(key string, rx *regexp.Regexp, n *html.Node) bool { - return matchAttribute(n, key, - func(s string) bool { - return rx.MatchString(s) - }) -} - -func (c attrSelector) Specificity() Specificity { - return Specificity{0, 1, 0} -} - -func (c attrSelector) PseudoElement() string { - return "" -} - -// ---------------- Pseudo class selectors ---------------- -// we use severals concrete types of pseudo-class selectors - -type relativePseudoClassSelector struct { - name string // one of "not", "has", "haschild" - match SelectorGroup -} - -func (s relativePseudoClassSelector) Match(n *html.Node) bool { - if n.Type != html.ElementNode { - return false - } - switch s.name { - case "not": - // matches elements that do not match a. - return !s.match.Match(n) - case "has": - // matches elements with any descendant that matches a. - return hasDescendantMatch(n, s.match) - case "haschild": - // matches elements with a child that matches a. - return hasChildMatch(n, s.match) - default: - panic(fmt.Sprintf("unsupported relative pseudo class selector : %s", s.name)) - } -} - -// hasChildMatch returns whether n has any child that matches a. -func hasChildMatch(n *html.Node, a Matcher) bool { - for c := n.FirstChild; c != nil; c = c.NextSibling { - if a.Match(c) { - return true - } - } - return false -} - -// hasDescendantMatch performs a depth-first search of n's descendants, -// testing whether any of them match a. It returns true as soon as a match is -// found, or false if no match is found. -func hasDescendantMatch(n *html.Node, a Matcher) bool { - for c := n.FirstChild; c != nil; c = c.NextSibling { - if a.Match(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) { - return true - } - } - return false -} - -// Specificity returns the specificity of the most specific selectors -// in the pseudo-class arguments. -// See https://www.w3.org/TR/selectors/#specificity-rules -func (s relativePseudoClassSelector) Specificity() Specificity { - var max Specificity - for _, sel := range s.match { - newSpe := sel.Specificity() - if max.Less(newSpe) { - max = newSpe - } - } - return max -} - -func (c relativePseudoClassSelector) PseudoElement() string { - return "" -} - -type containsPseudoClassSelector struct { - own bool - value string -} - -func (s containsPseudoClassSelector) Match(n *html.Node) bool { - var text string - if s.own { - // matches nodes that directly contain the given text - text = strings.ToLower(nodeOwnText(n)) - } else { - // matches nodes that contain the given text. - text = strings.ToLower(nodeText(n)) - } - return strings.Contains(text, s.value) -} - -func (s containsPseudoClassSelector) Specificity() Specificity { - return Specificity{0, 1, 0} -} - -func (c containsPseudoClassSelector) PseudoElement() string { - return "" -} - -type regexpPseudoClassSelector struct { - own bool - regexp *regexp.Regexp -} - -func (s regexpPseudoClassSelector) Match(n *html.Node) bool { - var text string - if s.own { - // matches nodes whose text directly matches the specified regular expression - text = nodeOwnText(n) - } else { - // matches nodes whose text matches the specified regular expression - text = nodeText(n) - } - return s.regexp.MatchString(text) -} - -// writeNodeText writes the text contained in n and its descendants to b. -func writeNodeText(n *html.Node, b *bytes.Buffer) { - switch n.Type { - case html.TextNode: - b.WriteString(n.Data) - case html.ElementNode: - for c := n.FirstChild; c != nil; c = c.NextSibling { - writeNodeText(c, b) - } - } -} - -// nodeText returns the text contained in n and its descendants. -func nodeText(n *html.Node) string { - var b bytes.Buffer - writeNodeText(n, &b) - return b.String() -} - -// nodeOwnText returns the contents of the text nodes that are direct -// children of n. -func nodeOwnText(n *html.Node) string { - var b bytes.Buffer - for c := n.FirstChild; c != nil; c = c.NextSibling { - if c.Type == html.TextNode { - b.WriteString(c.Data) - } - } - return b.String() -} - -func (s regexpPseudoClassSelector) Specificity() Specificity { - return Specificity{0, 1, 0} -} - -func (c regexpPseudoClassSelector) PseudoElement() string { - return "" -} - -type nthPseudoClassSelector struct { - a, b int - last, ofType bool -} - -func (s nthPseudoClassSelector) Match(n *html.Node) bool { - if s.a == 0 { - if s.last { - return simpleNthLastChildMatch(s.b, s.ofType, n) - } else { - return simpleNthChildMatch(s.b, s.ofType, n) - } - } - return nthChildMatch(s.a, s.b, s.last, s.ofType, n) -} - -// nthChildMatch implements :nth-child(an+b). -// If last is true, implements :nth-last-child instead. -// If ofType is true, implements :nth-of-type instead. -func nthChildMatch(a, b int, last, ofType bool, n *html.Node) bool { - if n.Type != html.ElementNode { - return false - } - - parent := n.Parent - if parent == nil { - return false - } - - if parent.Type == html.DocumentNode { - return false - } - - i := -1 - count := 0 - for c := parent.FirstChild; c != nil; c = c.NextSibling { - if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) { - continue - } - count++ - if c == n { - i = count - if !last { - break - } - } - } - - if i == -1 { - // This shouldn't happen, since n should always be one of its parent's children. - return false - } - - if last { - i = count - i + 1 - } - - i -= b - if a == 0 { - return i == 0 - } - - return i%a == 0 && i/a >= 0 -} - -// simpleNthChildMatch implements :nth-child(b). -// If ofType is true, implements :nth-of-type instead. -func simpleNthChildMatch(b int, ofType bool, n *html.Node) bool { - if n.Type != html.ElementNode { - return false - } - - parent := n.Parent - if parent == nil { - return false - } - - if parent.Type == html.DocumentNode { - return false - } - - count := 0 - for c := parent.FirstChild; c != nil; c = c.NextSibling { - if c.Type != html.ElementNode || (ofType && c.Data != n.Data) { - continue - } - count++ - if c == n { - return count == b - } - if count >= b { - return false - } - } - return false -} - -// simpleNthLastChildMatch implements :nth-last-child(b). -// If ofType is true, implements :nth-last-of-type instead. -func simpleNthLastChildMatch(b int, ofType bool, n *html.Node) bool { - if n.Type != html.ElementNode { - return false - } - - parent := n.Parent - if parent == nil { - return false - } - - if parent.Type == html.DocumentNode { - return false - } - - count := 0 - for c := parent.LastChild; c != nil; c = c.PrevSibling { - if c.Type != html.ElementNode || (ofType && c.Data != n.Data) { - continue - } - count++ - if c == n { - return count == b - } - if count >= b { - return false - } - } - return false -} - -// Specificity for nth-child pseudo-class. -// Does not support a list of selectors -func (s nthPseudoClassSelector) Specificity() Specificity { - return Specificity{0, 1, 0} -} - -func (c nthPseudoClassSelector) PseudoElement() string { - return "" -} - -type onlyChildPseudoClassSelector struct { - ofType bool -} - -// Match implements :only-child. -// If `ofType` is true, it implements :only-of-type instead. -func (s onlyChildPseudoClassSelector) Match(n *html.Node) bool { - if n.Type != html.ElementNode { - return false - } - - parent := n.Parent - if parent == nil { - return false - } - - if parent.Type == html.DocumentNode { - return false - } - - count := 0 - for c := parent.FirstChild; c != nil; c = c.NextSibling { - if (c.Type != html.ElementNode) || (s.ofType && c.Data != n.Data) { - continue - } - count++ - if count > 1 { - return false - } - } - - return count == 1 -} - -func (s onlyChildPseudoClassSelector) Specificity() Specificity { - return Specificity{0, 1, 0} -} - -func (c onlyChildPseudoClassSelector) PseudoElement() string { - return "" -} - -type inputPseudoClassSelector struct{} - -// Matches input, select, textarea and button elements. -func (s inputPseudoClassSelector) Match(n *html.Node) bool { - return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button") -} - -func (s inputPseudoClassSelector) Specificity() Specificity { - return Specificity{0, 1, 0} -} - -func (c inputPseudoClassSelector) PseudoElement() string { - return "" -} - -type emptyElementPseudoClassSelector struct{} - -// Matches empty elements. -func (s emptyElementPseudoClassSelector) Match(n *html.Node) bool { - if n.Type != html.ElementNode { - return false - } - - for c := n.FirstChild; c != nil; c = c.NextSibling { - switch c.Type { - case html.ElementNode, html.TextNode: - return false - } - } - - return true -} - -func (s emptyElementPseudoClassSelector) Specificity() Specificity { - return Specificity{0, 1, 0} -} - -func (c emptyElementPseudoClassSelector) PseudoElement() string { - return "" -} - -type rootPseudoClassSelector struct{} - -// Match implements :root -func (s rootPseudoClassSelector) Match(n *html.Node) bool { - if n.Type != html.ElementNode { - return false - } - if n.Parent == nil { - return false - } - return n.Parent.Type == html.DocumentNode -} - -func (s rootPseudoClassSelector) Specificity() Specificity { - return Specificity{0, 1, 0} -} - -func (c rootPseudoClassSelector) PseudoElement() string { - return "" -} - -type compoundSelector struct { - selectors []Sel - pseudoElement string -} - -// Matches elements if each sub-selectors matches. -func (t compoundSelector) Match(n *html.Node) bool { - if len(t.selectors) == 0 { - return n.Type == html.ElementNode - } - - for _, sel := range t.selectors { - if !sel.Match(n) { - return false - } - } - return true -} - -func (s compoundSelector) Specificity() Specificity { - var out Specificity - for _, sel := range s.selectors { - out = out.Add(sel.Specificity()) - } - if s.pseudoElement != "" { - // https://drafts.csswg.org/selectors-3/#specificity - out = out.Add(Specificity{0, 0, 1}) - } - return out -} - -func (c compoundSelector) PseudoElement() string { - return c.pseudoElement -} - -type combinedSelector struct { - first Sel - combinator byte - second Sel -} - -func (t combinedSelector) Match(n *html.Node) bool { - if t.first == nil { - return false // maybe we should panic - } - switch t.combinator { - case 0: - return t.first.Match(n) - case ' ': - return descendantMatch(t.first, t.second, n) - case '>': - return childMatch(t.first, t.second, n) - case '+': - return siblingMatch(t.first, t.second, true, n) - case '~': - return siblingMatch(t.first, t.second, false, n) - default: - panic("unknown combinator") - } -} - -// matches an element if it matches d and has an ancestor that matches a. -func descendantMatch(a, d Matcher, n *html.Node) bool { - if !d.Match(n) { - return false - } - - for p := n.Parent; p != nil; p = p.Parent { - if a.Match(p) { - return true - } - } - - return false -} - -// matches an element if it matches d and its parent matches a. -func childMatch(a, d Matcher, n *html.Node) bool { - return d.Match(n) && n.Parent != nil && a.Match(n.Parent) -} - -// matches an element if it matches s2 and is preceded by an element that matches s1. -// If adjacent is true, the sibling must be immediately before the element. -func siblingMatch(s1, s2 Matcher, adjacent bool, n *html.Node) bool { - if !s2.Match(n) { - return false - } - - if adjacent { - for n = n.PrevSibling; n != nil; n = n.PrevSibling { - if n.Type == html.TextNode || n.Type == html.CommentNode { - continue - } - return s1.Match(n) - } - return false - } - - // Walk backwards looking for element that matches s1 - for c := n.PrevSibling; c != nil; c = c.PrevSibling { - if s1.Match(c) { - return true - } - } - - return false -} - -func (s combinedSelector) Specificity() Specificity { - spec := s.first.Specificity() - if s.second != nil { - spec = spec.Add(s.second.Specificity()) - } - return spec -} - -// on combinedSelector, a pseudo-element only makes sens on the last -// selector, although others increase specificity. -func (c combinedSelector) PseudoElement() string { - if c.second == nil { - return "" - } - return c.second.PseudoElement() -} - -// A SelectorGroup is a list of selectors, which matches if any of the -// individual selectors matches. -type SelectorGroup []Sel - -// Match returns true if the node matches one of the single selectors. -func (s SelectorGroup) Match(n *html.Node) bool { - for _, sel := range s { - if sel.Match(n) { - return true - } - } - return false -} diff --git a/vendor/github.com/andybalholm/cascadia/serialize.go b/vendor/github.com/andybalholm/cascadia/serialize.go deleted file mode 100644 index f15b079..0000000 --- a/vendor/github.com/andybalholm/cascadia/serialize.go +++ /dev/null @@ -1,120 +0,0 @@ -package cascadia - -import ( - "fmt" - "strings" -) - -// implements the reverse operation Sel -> string - -func (c tagSelector) String() string { - return c.tag -} - -func (c idSelector) String() string { - return "#" + c.id -} - -func (c classSelector) String() string { - return "." + c.class -} - -func (c attrSelector) String() string { - val := c.val - if c.operation == "#=" { - val = c.regexp.String() - } else if c.operation != "" { - val = fmt.Sprintf(`"%s"`, val) - } - return fmt.Sprintf(`[%s%s%s]`, c.key, c.operation, val) -} - -func (c relativePseudoClassSelector) String() string { - return fmt.Sprintf(":%s(%s)", c.name, c.match.String()) -} -func (c containsPseudoClassSelector) String() string { - s := "contains" - if c.own { - s += "Own" - } - return fmt.Sprintf(`:%s("%s")`, s, c.value) -} -func (c regexpPseudoClassSelector) String() string { - s := "matches" - if c.own { - s += "Own" - } - return fmt.Sprintf(":%s(%s)", s, c.regexp.String()) -} -func (c nthPseudoClassSelector) String() string { - if c.a == 0 && c.b == 1 { // special cases - s := ":first-" - if c.last { - s = ":last-" - } - if c.ofType { - s += "of-type" - } else { - s += "child" - } - return s - } - var name string - switch [2]bool{c.last, c.ofType} { - case [2]bool{true, true}: - name = "nth-last-of-type" - case [2]bool{true, false}: - name = "nth-last-child" - case [2]bool{false, true}: - name = "nth-of-type" - case [2]bool{false, false}: - name = "nth-child" - } - return fmt.Sprintf(":%s(%dn+%d)", name, c.a, c.b) -} -func (c onlyChildPseudoClassSelector) String() string { - if c.ofType { - return ":only-of-type" - } - return ":only-child" -} -func (c inputPseudoClassSelector) String() string { - return ":input" -} -func (c emptyElementPseudoClassSelector) String() string { - return ":empty" -} -func (c rootPseudoClassSelector) String() string { - return ":root" -} - -func (c compoundSelector) String() string { - if len(c.selectors) == 0 && c.pseudoElement == "" { - return "*" - } - chunks := make([]string, len(c.selectors)) - for i, sel := range c.selectors { - chunks[i] = sel.String() - } - s := strings.Join(chunks, "") - if c.pseudoElement != "" { - s += "::" + c.pseudoElement - } - return s -} - -func (c combinedSelector) String() string { - start := c.first.String() - if c.second != nil { - start += fmt.Sprintf(" %s %s", string(c.combinator), c.second.String()) - } - return start -} - -func (c SelectorGroup) String() string { - ck := make([]string, len(c)) - for i, s := range c { - ck[i] = s.String() - } - return strings.Join(ck, ", ") -} diff --git a/vendor/github.com/andybalholm/cascadia/specificity.go b/vendor/github.com/andybalholm/cascadia/specificity.go deleted file mode 100644 index 8db864f..0000000 --- a/vendor/github.com/andybalholm/cascadia/specificity.go +++ /dev/null @@ -1,26 +0,0 @@ -package cascadia - -// Specificity is the CSS specificity as defined in -// https://www.w3.org/TR/selectors/#specificity-rules -// with the convention Specificity = [A,B,C]. -type Specificity [3]int - -// returns `true` if s < other (strictly), false otherwise -func (s Specificity) Less(other Specificity) bool { - for i := range s { - if s[i] < other[i] { - return true - } - if s[i] > other[i] { - return false - } - } - return false -} - -func (s Specificity) Add(other Specificity) Specificity { - for i, sp := range other { - s[i] += sp - } - return s -} diff --git a/vendor/github.com/antchfx/htmlquery/.gitignore b/vendor/github.com/antchfx/htmlquery/.gitignore deleted file mode 100644 index 4d5d27b..0000000 --- a/vendor/github.com/antchfx/htmlquery/.gitignore +++ /dev/null @@ -1,32 +0,0 @@ -# vscode -.vscode -debug -*.test - -./build - -# Compiled Object files, Static and Dynamic libs (Shared Objects) -*.o -*.a -*.so - - -# Folders -_obj -_test - -# Architecture specific extensions/prefixes -*.[568vq] -[568vq].out - -*.cgo1.go -*.cgo2.c -_cgo_defun.c -_cgo_gotypes.go -_cgo_export.* - -_testmain.go - -*.exe -*.test -*.prof \ No newline at end of file diff --git a/vendor/github.com/antchfx/htmlquery/.travis.yml b/vendor/github.com/antchfx/htmlquery/.travis.yml deleted file mode 100644 index 86da84a..0000000 --- a/vendor/github.com/antchfx/htmlquery/.travis.yml +++ /dev/null @@ -1,16 +0,0 @@ -language: go - -go: - - 1.9.x - - 1.12.x - - 1.13.x - -install: - - go get golang.org/x/net/html/charset - - go get golang.org/x/net/html - - go get github.com/antchfx/xpath - - go get github.com/mattn/goveralls - - go get github.com/golang/groupcache - -script: - - $HOME/gopath/bin/goveralls -service=travis-ci \ No newline at end of file diff --git a/vendor/github.com/antchfx/htmlquery/BUILD.bazel b/vendor/github.com/antchfx/htmlquery/BUILD.bazel deleted file mode 100644 index 8dbbece..0000000 --- a/vendor/github.com/antchfx/htmlquery/BUILD.bazel +++ /dev/null @@ -1,18 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -go_library( - name = "htmlquery", - srcs = [ - "cache.go", - "query.go", - ], - importmap = "peridot.resf.org/vendor/github.com/antchfx/htmlquery", - importpath = "github.com/antchfx/htmlquery", - visibility = ["//visibility:public"], - deps = [ - "//vendor/github.com/antchfx/xpath", - "//vendor/github.com/golang/groupcache/lru", - "@org_golang_x_net//html", - "@org_golang_x_net//html/charset", - ], -) diff --git a/vendor/github.com/antchfx/htmlquery/LICENSE b/vendor/github.com/antchfx/htmlquery/LICENSE deleted file mode 100644 index e14c371..0000000 --- a/vendor/github.com/antchfx/htmlquery/LICENSE +++ /dev/null @@ -1,17 +0,0 @@ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file diff --git a/vendor/github.com/antchfx/htmlquery/README.md b/vendor/github.com/antchfx/htmlquery/README.md deleted file mode 100644 index 6e7775d..0000000 --- a/vendor/github.com/antchfx/htmlquery/README.md +++ /dev/null @@ -1,168 +0,0 @@ -htmlquery -==== -[![Build Status](https://travis-ci.org/antchfx/htmlquery.svg?branch=master)](https://travis-ci.org/antchfx/htmlquery) -[![Coverage Status](https://coveralls.io/repos/github/antchfx/htmlquery/badge.svg?branch=master)](https://coveralls.io/github/antchfx/htmlquery?branch=master) -[![GoDoc](https://godoc.org/github.com/antchfx/htmlquery?status.svg)](https://godoc.org/github.com/antchfx/htmlquery) -[![Go Report Card](https://goreportcard.com/badge/github.com/antchfx/htmlquery)](https://goreportcard.com/report/github.com/antchfx/htmlquery) - -Overview -==== - -`htmlquery` is an XPath query package for HTML, lets you extract data or evaluate from HTML documents by an XPath expression. - -`htmlquery` built-in the query object caching feature based on [LRU](https://godoc.org/github.com/golang/groupcache/lru), this feature will caching the recently used XPATH query string. Enable query caching can avoid re-compile XPath expression each query. - -Installation -==== - -``` -go get github.com/antchfx/htmlquery -``` - -Getting Started -==== - -#### Query, returns matched elements or error. - -```go -nodes, err := htmlquery.QueryAll(doc, "//a") -if err != nil { - panic(`not a valid XPath expression.`) -} -``` - -#### Load HTML document from URL. - -```go -doc, err := htmlquery.LoadURL("http://example.com/") -``` - -#### Load HTML from document. - -```go -filePath := "/home/user/sample.html" -doc, err := htmlquery.LoadDoc(filePath) -``` - -#### Load HTML document from string. - -```go -s := `....` -doc, err := htmlquery.Parse(strings.NewReader(s)) -``` - -#### Find all A elements. - -```go -list := htmlquery.Find(doc, "//a") -``` - -#### Find all A elements that have `href` attribute. - -```go -list := range htmlquery.Find(doc, "//a[@href]") -``` - -#### Find all A elements with `href` attribute and only return `href` value. - -```go -list := range htmlquery.Find(doc, "//a/@href") -for n := range list{ - fmt.Println(htmlquery.InnerText(n)) // output @href value without A element. -} -``` - -### Find the third A element. - -```go -a := htmlquery.FindOne(doc, "//a[3]") -``` - -#### Evaluate the number of all IMG element. - -```go -expr, _ := xpath.Compile("count(//img)") -v := expr.Evaluate(htmlquery.CreateXPathNavigator(doc)).(float64) -fmt.Printf("total count is %f", v) -``` - - -FAQ -==== - -#### `Find()` vs `QueryAll()`, which is better? - -`Find` and `QueryAll` both do the same things, searches all of matched html nodes. -The `Find` will panics if you give an error XPath query, but `QueryAll` will return an error for you. - -#### Can I save my query expression object for the next query? - -Yes, you can. We offer the `QuerySelector` and `QuerySelectorAll` methods, It will accept your query expression object. - -Cache a query expression object(or reused) will avoid re-compile XPath query expression, improve your query performance. - -#### XPath query object cache performance - -``` -goos: windows -goarch: amd64 -pkg: github.com/antchfx/htmlquery -BenchmarkSelectorCache-4 20000000 55.2 ns/op -BenchmarkDisableSelectorCache-4 500000 3162 ns/op -``` - -#### How to disable caching? - -``` -htmlquery.DisableSelectorCache = true -``` - -Changelogs -=== - -2019-11-19 -- Add built-in query object cache feature, avoid re-compilation for the same query string. [#16](https://github.com/antchfx/htmlquery/issues/16) -- Added LoadDoc [18](https://github.com/antchfx/htmlquery/pull/18) - -2019-10-05 -- Add new methods that compatible with invalid XPath expression error: `QueryAll` and `Query`. -- Add `QuerySelector` and `QuerySelectorAll` methods, supported reused your query object. - -2019-02-04 -- [#7](https://github.com/antchfx/htmlquery/issues/7) Removed deprecated `FindEach()` and `FindEachWithBreak()` methods. - -2018-12-28 -- Avoid adding duplicate elements to list for `Find()` method. [#6](https://github.com/antchfx/htmlquery/issues/6) - -Tutorial -=== - -```go -func main() { - doc, err := htmlquery.LoadURL("https://www.bing.com/search?q=golang") - if err != nil { - panic(err) - } - // Find all news item. - list, err := htmlquery.QueryAll(doc, "//ol/li") - if err != nil { - panic(err) - } - for i, n := range list { - a := htmlquery.FindOne(n, "//a") - fmt.Printf("%d %s(%s)\n", i, htmlquery.InnerText(a), htmlquery.SelectAttr(a, "href")) - } -} -``` - -List of supported XPath query packages -=== -| Name | Description | -| ------------------------------------------------- | ----------------------------------------- | -| [htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for the HTML document | -| [xmlquery](https://github.com/antchfx/xmlquery) | XPath query package for the XML document | -| [jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for the JSON document | - -Questions -=== -Please let me know if you have any questions. diff --git a/vendor/github.com/antchfx/htmlquery/cache.go b/vendor/github.com/antchfx/htmlquery/cache.go deleted file mode 100644 index e27cd28..0000000 --- a/vendor/github.com/antchfx/htmlquery/cache.go +++ /dev/null @@ -1,42 +0,0 @@ -package htmlquery - -import ( - "sync" - - "github.com/antchfx/xpath" - "github.com/golang/groupcache/lru" -) - -// DisableSelectorCache will disable caching for the query selector if value is true. -var DisableSelectorCache = false - -// SelectorCacheMaxEntries allows how many selector object can be caching. Default is 50. -// Will disable caching if SelectorCacheMaxEntries <= 0. -var SelectorCacheMaxEntries = 50 - -var ( - cacheOnce sync.Once - cache *lru.Cache - cacheMutex sync.Mutex -) - -func getQuery(expr string) (*xpath.Expr, error) { - if DisableSelectorCache || SelectorCacheMaxEntries <= 0 { - return xpath.Compile(expr) - } - cacheOnce.Do(func() { - cache = lru.New(SelectorCacheMaxEntries) - }) - cacheMutex.Lock() - defer cacheMutex.Unlock() - if v, ok := cache.Get(expr); ok { - return v.(*xpath.Expr), nil - } - v, err := xpath.Compile(expr) - if err != nil { - return nil, err - } - cache.Add(expr, v) - return v, nil - -} diff --git a/vendor/github.com/antchfx/htmlquery/go.mod b/vendor/github.com/antchfx/htmlquery/go.mod deleted file mode 100644 index 14169f5..0000000 --- a/vendor/github.com/antchfx/htmlquery/go.mod +++ /dev/null @@ -1,9 +0,0 @@ -module github.com/antchfx/htmlquery - -go 1.14 - -require ( - github.com/antchfx/xpath v1.1.6 - github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e - golang.org/x/net v0.0.0-20200421231249-e086a090c8fd -) diff --git a/vendor/github.com/antchfx/htmlquery/go.sum b/vendor/github.com/antchfx/htmlquery/go.sum deleted file mode 100644 index 2d6e03e..0000000 --- a/vendor/github.com/antchfx/htmlquery/go.sum +++ /dev/null @@ -1,11 +0,0 @@ -github.com/antchfx/xpath v1.1.6 h1:6sVh6hB5T6phw1pFpHRQ+C4bd8sNI+O58flqtg7h0R0= -github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/net v0.0.0-20200421231249-e086a090c8fd h1:QPwSajcTUrFriMF1nJ3XzgoqakqQEsnZf9LdXdi2nkI= -golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/vendor/github.com/antchfx/htmlquery/query.go b/vendor/github.com/antchfx/htmlquery/query.go deleted file mode 100644 index db75782..0000000 --- a/vendor/github.com/antchfx/htmlquery/query.go +++ /dev/null @@ -1,338 +0,0 @@ -/* -Package htmlquery provides extract data from HTML documents using XPath expression. -*/ -package htmlquery - -import ( - "bufio" - "bytes" - "fmt" - "io" - "net/http" - "os" - - "github.com/antchfx/xpath" - "golang.org/x/net/html" - "golang.org/x/net/html/charset" -) - -var _ xpath.NodeNavigator = &NodeNavigator{} - -// CreateXPathNavigator creates a new xpath.NodeNavigator for the specified html.Node. -func CreateXPathNavigator(top *html.Node) *NodeNavigator { - return &NodeNavigator{curr: top, root: top, attr: -1} -} - -// Find is like QueryAll but Will panics if the expression `expr` cannot be parsed. -// -// See `QueryAll()` function. -func Find(top *html.Node, expr string) []*html.Node { - nodes, err := QueryAll(top, expr) - if err != nil { - panic(err) - } - return nodes -} - -// FindOne is like Query but will panics if the expression `expr` cannot be parsed. -// See `Query()` function. -func FindOne(top *html.Node, expr string) *html.Node { - node, err := Query(top, expr) - if err != nil { - panic(err) - } - return node -} - -// QueryAll searches the html.Node that matches by the specified XPath expr. -// Return an error if the expression `expr` cannot be parsed. -func QueryAll(top *html.Node, expr string) ([]*html.Node, error) { - exp, err := getQuery(expr) - if err != nil { - return nil, err - } - nodes := QuerySelectorAll(top, exp) - return nodes, nil -} - -// Query searches the html.Node that matches by the specified XPath expr, -// and return the first element of matched html.Node. -// -// Return an error if the expression `expr` cannot be parsed. -func Query(top *html.Node, expr string) (*html.Node, error) { - exp, err := getQuery(expr) - if err != nil { - return nil, err - } - return QuerySelector(top, exp), nil -} - -// QuerySelector returns the first matched html.Node by the specified XPath selector. -func QuerySelector(top *html.Node, selector *xpath.Expr) *html.Node { - t := selector.Select(CreateXPathNavigator(top)) - if t.MoveNext() { - return getCurrentNode(t.Current().(*NodeNavigator)) - } - return nil -} - -// QuerySelectorAll searches all of the html.Node that matches the specified XPath selectors. -func QuerySelectorAll(top *html.Node, selector *xpath.Expr) []*html.Node { - var elems []*html.Node - t := selector.Select(CreateXPathNavigator(top)) - for t.MoveNext() { - nav := t.Current().(*NodeNavigator) - n := getCurrentNode(nav) - // avoid adding duplicate nodes. - if len(elems) > 0 && (elems[0] == n || (nav.NodeType() == xpath.AttributeNode && - nav.LocalName() == elems[0].Data && nav.Value() == InnerText(elems[0]))) { - continue - } - elems = append(elems, n) - } - return elems -} - -// LoadURL loads the HTML document from the specified URL. -func LoadURL(url string) (*html.Node, error) { - resp, err := http.Get(url) - if err != nil { - return nil, err - } - defer resp.Body.Close() - - r, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type")) - if err != nil { - return nil, err - } - return html.Parse(r) -} - -// LoadDoc loads the HTML document from the specified file path. -func LoadDoc(path string) (*html.Node, error) { - f, err := os.Open(path) - if err != nil { - return nil, err - } - defer f.Close() - - return html.Parse(bufio.NewReader(f)) -} - -func getCurrentNode(n *NodeNavigator) *html.Node { - if n.NodeType() == xpath.AttributeNode { - childNode := &html.Node{ - Type: html.TextNode, - Data: n.Value(), - } - return &html.Node{ - Type: html.ElementNode, - Data: n.LocalName(), - FirstChild: childNode, - LastChild: childNode, - } - - } - return n.curr -} - -// Parse returns the parse tree for the HTML from the given Reader. -func Parse(r io.Reader) (*html.Node, error) { - return html.Parse(r) -} - -// InnerText returns the text between the start and end tags of the object. -func InnerText(n *html.Node) string { - var output func(*bytes.Buffer, *html.Node) - output = func(buf *bytes.Buffer, n *html.Node) { - switch n.Type { - case html.TextNode: - buf.WriteString(n.Data) - return - case html.CommentNode: - return - } - for child := n.FirstChild; child != nil; child = child.NextSibling { - output(buf, child) - } - } - - var buf bytes.Buffer - output(&buf, n) - return buf.String() -} - -// SelectAttr returns the attribute value with the specified name. -func SelectAttr(n *html.Node, name string) (val string) { - if n == nil { - return - } - if n.Type == html.ElementNode && n.Parent == nil && name == n.Data { - return InnerText(n) - } - for _, attr := range n.Attr { - if attr.Key == name { - val = attr.Val - break - } - } - return -} - -// OutputHTML returns the text including tags name. -func OutputHTML(n *html.Node, self bool) string { - var buf bytes.Buffer - if self { - html.Render(&buf, n) - } else { - for n := n.FirstChild; n != nil; n = n.NextSibling { - html.Render(&buf, n) - } - } - return buf.String() -} - -type NodeNavigator struct { - root, curr *html.Node - attr int -} - -func (h *NodeNavigator) Current() *html.Node { - return h.curr -} - -func (h *NodeNavigator) NodeType() xpath.NodeType { - switch h.curr.Type { - case html.CommentNode: - return xpath.CommentNode - case html.TextNode: - return xpath.TextNode - case html.DocumentNode: - return xpath.RootNode - case html.ElementNode: - if h.attr != -1 { - return xpath.AttributeNode - } - return xpath.ElementNode - case html.DoctypeNode: - // ignored declare and as Root-Node type. - return xpath.RootNode - } - panic(fmt.Sprintf("unknown HTML node type: %v", h.curr.Type)) -} - -func (h *NodeNavigator) LocalName() string { - if h.attr != -1 { - return h.curr.Attr[h.attr].Key - } - return h.curr.Data -} - -func (*NodeNavigator) Prefix() string { - return "" -} - -func (h *NodeNavigator) Value() string { - switch h.curr.Type { - case html.CommentNode: - return h.curr.Data - case html.ElementNode: - if h.attr != -1 { - return h.curr.Attr[h.attr].Val - } - return InnerText(h.curr) - case html.TextNode: - return h.curr.Data - } - return "" -} - -func (h *NodeNavigator) Copy() xpath.NodeNavigator { - n := *h - return &n -} - -func (h *NodeNavigator) MoveToRoot() { - h.curr = h.root -} - -func (h *NodeNavigator) MoveToParent() bool { - if h.attr != -1 { - h.attr = -1 - return true - } else if node := h.curr.Parent; node != nil { - h.curr = node - return true - } - return false -} - -func (h *NodeNavigator) MoveToNextAttribute() bool { - if h.attr >= len(h.curr.Attr)-1 { - return false - } - h.attr++ - return true -} - -func (h *NodeNavigator) MoveToChild() bool { - if h.attr != -1 { - return false - } - if node := h.curr.FirstChild; node != nil { - h.curr = node - return true - } - return false -} - -func (h *NodeNavigator) MoveToFirst() bool { - if h.attr != -1 || h.curr.PrevSibling == nil { - return false - } - for { - node := h.curr.PrevSibling - if node == nil { - break - } - h.curr = node - } - return true -} - -func (h *NodeNavigator) String() string { - return h.Value() -} - -func (h *NodeNavigator) MoveToNext() bool { - if h.attr != -1 { - return false - } - if node := h.curr.NextSibling; node != nil { - h.curr = node - return true - } - return false -} - -func (h *NodeNavigator) MoveToPrevious() bool { - if h.attr != -1 { - return false - } - if node := h.curr.PrevSibling; node != nil { - h.curr = node - return true - } - return false -} - -func (h *NodeNavigator) MoveTo(other xpath.NodeNavigator) bool { - node, ok := other.(*NodeNavigator) - if !ok || node.root != h.root { - return false - } - - h.curr = node.curr - h.attr = node.attr - return true -} diff --git a/vendor/github.com/antchfx/xmlquery/.gitignore b/vendor/github.com/antchfx/xmlquery/.gitignore deleted file mode 100644 index 4d5d27b..0000000 --- a/vendor/github.com/antchfx/xmlquery/.gitignore +++ /dev/null @@ -1,32 +0,0 @@ -# vscode -.vscode -debug -*.test - -./build - -# Compiled Object files, Static and Dynamic libs (Shared Objects) -*.o -*.a -*.so - - -# Folders -_obj -_test - -# Architecture specific extensions/prefixes -*.[568vq] -[568vq].out - -*.cgo1.go -*.cgo2.c -_cgo_defun.c -_cgo_gotypes.go -_cgo_export.* - -_testmain.go - -*.exe -*.test -*.prof \ No newline at end of file diff --git a/vendor/github.com/antchfx/xmlquery/.travis.yml b/vendor/github.com/antchfx/xmlquery/.travis.yml deleted file mode 100644 index 731b767..0000000 --- a/vendor/github.com/antchfx/xmlquery/.travis.yml +++ /dev/null @@ -1,17 +0,0 @@ -language: go - -go: - - 1.9.x - - 1.12.x - - 1.13.x - - 1.14.x - - 1.15.x - -install: - - go get golang.org/x/net/html/charset - - go get github.com/antchfx/xpath - - go get github.com/mattn/goveralls - - go get github.com/golang/groupcache - -script: - - $HOME/gopath/bin/goveralls -service=travis-ci diff --git a/vendor/github.com/antchfx/xmlquery/BUILD.bazel b/vendor/github.com/antchfx/xmlquery/BUILD.bazel deleted file mode 100644 index d69f9ab..0000000 --- a/vendor/github.com/antchfx/xmlquery/BUILD.bazel +++ /dev/null @@ -1,21 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -go_library( - name = "xmlquery", - srcs = [ - "cache.go", - "cached_reader.go", - "node.go", - "options.go", - "parse.go", - "query.go", - ], - importmap = "peridot.resf.org/vendor/github.com/antchfx/xmlquery", - importpath = "github.com/antchfx/xmlquery", - visibility = ["//visibility:public"], - deps = [ - "//vendor/github.com/antchfx/xpath", - "//vendor/github.com/golang/groupcache/lru", - "@org_golang_x_net//html/charset", - ], -) diff --git a/vendor/github.com/antchfx/xmlquery/LICENSE b/vendor/github.com/antchfx/xmlquery/LICENSE deleted file mode 100644 index e14c371..0000000 --- a/vendor/github.com/antchfx/xmlquery/LICENSE +++ /dev/null @@ -1,17 +0,0 @@ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file diff --git a/vendor/github.com/antchfx/xmlquery/README.md b/vendor/github.com/antchfx/xmlquery/README.md deleted file mode 100644 index bae7fc3..0000000 --- a/vendor/github.com/antchfx/xmlquery/README.md +++ /dev/null @@ -1,262 +0,0 @@ -xmlquery -==== -[![Build Status](https://travis-ci.org/antchfx/xmlquery.svg?branch=master)](https://travis-ci.org/antchfx/xmlquery) -[![Coverage Status](https://coveralls.io/repos/github/antchfx/xmlquery/badge.svg?branch=master)](https://coveralls.io/github/antchfx/xmlquery?branch=master) -[![GoDoc](https://godoc.org/github.com/antchfx/xmlquery?status.svg)](https://godoc.org/github.com/antchfx/xmlquery) -[![Go Report Card](https://goreportcard.com/badge/github.com/antchfx/xmlquery)](https://goreportcard.com/report/github.com/antchfx/xmlquery) - -Overview -=== - -`xmlquery` is an XPath query package for XML documents, allowing you to extract -data or evaluate from XML documents with an XPath expression. - -`xmlquery` has a built-in query object caching feature that caches recently used -XPATH query strings. Enabling caching can avoid recompile XPath expression for -each query. - -Change Logs -=== - -2020-08-?? -- Add XML stream loading and parsing support. - -2019-11-11 -- Add XPath query caching. - -2019-10-05 -- Add new methods compatible with invalid XPath expression error: `QueryAll` and `Query`. -- Add `QuerySelector` and `QuerySelectorAll` methods, support for reused query objects. -- PR [#12](https://github.com/antchfx/xmlquery/pull/12) (Thanks @FrancescoIlario) -- PR [#11](https://github.com/antchfx/xmlquery/pull/11) (Thanks @gjvnq) - -2018-12-23 -- Added XML output including comment nodes. [#9](https://github.com/antchfx/xmlquery/issues/9) - -2018-12-03 -- Added support to attribute name with namespace prefix and XML output. [#6](https://github.com/antchfx/xmlquery/issues/6) - -Installation -==== -``` - $ go get github.com/antchfx/xmlquery -``` - -Getting Started -=== - -### Find specified XPath query. - -```go -list, err := xmlquery.QueryAll(doc, "a") -if err != nil { - panic(err) -} -``` - -#### Parse an XML from URL. - -```go -doc, err := xmlquery.LoadURL("http://www.example.com/sitemap.xml") -``` - -#### Parse an XML from string. - -```go -s := `` -doc, err := xmlquery.Parse(strings.NewReader(s)) -``` - -#### Parse an XML from io.Reader. - -```go -f, err := os.Open("../books.xml") -doc, err := xmlquery.Parse(f) -``` - -#### Parse an XML in a stream fashion (simple case without elements filtering). - -```go -f, err := os.Open("../books.xml") -p, err := xmlquery.CreateStreamParser(f, "/bookstore/book") -for { - n, err := p.Read() - if err == io.EOF { - break - } - if err != nil { - ... - } -} -``` - -#### Parse an XML in a stream fashion (simple case advanced element filtering). - -```go -f, err := os.Open("../books.xml") -p, err := xmlquery.CreateStreamParser(f, "/bookstore/book", "/bookstore/book[price>=10]") -for { - n, err := p.Read() - if err == io.EOF { - break - } - if err != nil { - ... - } -} -``` - -#### Find authors of all books in the bookstore. - -```go -list := xmlquery.Find(doc, "//book//author") -// or -list := xmlquery.Find(doc, "//author") -``` - -#### Find the second book. - -```go -book := xmlquery.FindOne(doc, "//book[2]") -``` - -#### Find all book elements and only get `id` attribute. (New Feature) - -```go -list := xmlquery.Find(doc,"//book/@id") -``` - -#### Find all books with id `bk104`. - -```go -list := xmlquery.Find(doc, "//book[@id='bk104']") -``` - -#### Find all books with price less than 5. - -```go -list := xmlquery.Find(doc, "//book[price<5]") -``` - -#### Evaluate total price of all books. - -```go -expr, err := xpath.Compile("sum(//book/price)") -price := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64) -fmt.Printf("total price: %f\n", price) -``` - -#### Evaluate number of all book elements. - -```go -expr, err := xpath.Compile("count(//book)") -price := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64) -``` - -FAQ -==== - -#### `Find()` vs `QueryAll()`, which is better? - -`Find` and `QueryAll` both do the same thing: searches all of matched XML nodes. -`Find` panics if provided with an invalid XPath query, while `QueryAll` returns -an error. - -#### Can I save my query expression object for the next query? - -Yes, you can. We provide `QuerySelector` and `QuerySelectorAll` methods; they -accept your query expression object. - -Caching a query expression object avoids recompiling the XPath query -expression, improving query performance. - -#### Create XML document. - -```go -doc := &xmlquery.Node{ - Type: xmlquery.DeclarationNode, - Data: "xml", - Attr: []xml.Attr{ - xml.Attr{Name: xml.Name{Local: "version"}, Value: "1.0"}, - }, -} -root := &xmlquery.Node{ - Data: "rss", - Type: xmlquery.ElementNode, -} -doc.FirstChild = root -channel := &xmlquery.Node{ - Data: "channel", - Type: xmlquery.ElementNode, -} -root.FirstChild = channel -title := &xmlquery.Node{ - Data: "title", - Type: xmlquery.ElementNode, -} -title_text := &xmlquery.Node{ - Data: "W3Schools Home Page", - Type: xmlquery.TextNode, -} -title.FirstChild = title_text -channel.FirstChild = title -fmt.Println(doc.OutputXML(true)) -// W3Schools Home Page -``` - -Quick Tutorial -=== - -```go -import ( - "github.com/antchfx/xmlquery" -) - -func main(){ - s := ` - - - W3Schools Home Page - https://www.w3schools.com - Free web building tutorials - - RSS Tutorial - https://www.w3schools.com/xml/xml_rss.asp - New RSS tutorial on W3Schools - - - XML Tutorial - https://www.w3schools.com/xml - New XML tutorial on W3Schools - - -` - - doc, err := xmlquery.Parse(strings.NewReader(s)) - if err != nil { - panic(err) - } - channel := xmlquery.FindOne(doc, "//channel") - if n := channel.SelectElement("title"); n != nil { - fmt.Printf("title: %s\n", n.InnerText()) - } - if n := channel.SelectElement("link"); n != nil { - fmt.Printf("link: %s\n", n.InnerText()) - } - for i, n := range xmlquery.Find(doc, "//item/title") { - fmt.Printf("#%d %s\n", i, n.InnerText()) - } -} -``` - -List of supported XPath query packages -=== -| Name | Description | -| ------------------------------------------------- | ----------------------------------------- | -| [htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for HTML documents | -| [xmlquery](https://github.com/antchfx/xmlquery) | XPath query package for XML documents | -| [jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for JSON documents | - - Questions -=== -Please let me know if you have any questions diff --git a/vendor/github.com/antchfx/xmlquery/books.xml b/vendor/github.com/antchfx/xmlquery/books.xml deleted file mode 100644 index 85a74b5..0000000 --- a/vendor/github.com/antchfx/xmlquery/books.xml +++ /dev/null @@ -1,121 +0,0 @@ - - - - - Gambardella, Matthew - XML Developer's Guide - Computer - 44.95 - 2000-10-01 - An in-depth look at creating applications - with XML. - - - Ralls, Kim - Midnight Rain - Fantasy - 5.95 - 2000-12-16 - A former architect battles corporate zombies, - an evil sorceress, and her own childhood to become queen - of the world. - - - Corets, Eva - Maeve Ascendant - Fantasy - 5.95 - 2000-11-17 - After the collapse of a nanotechnology - society in England, the young survivors lay the - foundation for a new society. - - - Corets, Eva - Oberon's Legacy - Fantasy - 5.95 - 2001-03-10 - In post-apocalypse England, the mysterious - agent known only as Oberon helps to create a new life - for the inhabitants of London. Sequel to Maeve - Ascendant. - - - Corets, Eva - The Sundered Grail - Fantasy - 5.95 - 2001-09-10 - The two daughters of Maeve, half-sisters, - battle one another for control of England. Sequel to - Oberon's Legacy. - - - Randall, Cynthia - Lover Birds - Romance - 4.95 - 2000-09-02 - When Carla meets Paul at an ornithology - conference, tempers fly as feathers get ruffled. - - - Thurman, Paula - Splish Splash - Romance - 4.95 - 2000-11-02 - A deep sea diver finds true love twenty - thousand leagues beneath the sea. - - - Knorr, Stefan - Creepy Crawlies - Horror - 4.95 - 2000-12-06 - An anthology of horror stories about roaches, - centipedes, scorpions and other insects. - - - Kress, Peter - Paradox Lost - Science Fiction - 6.95 - 2000-11-02 - After an inadvertant trip through a Heisenberg - Uncertainty Device, James Salway discovers the problems - of being quantum. - - - O'Brien, Tim - Microsoft .NET: The Programming Bible - Computer - 36.95 - 2000-12-09 - Microsoft's .NET initiative is explored in - detail in this deep programmer's reference. - - - O'Brien, Tim - MSXML3: A Comprehensive Guide - Computer - 36.95 - 2000-12-01 - The Microsoft MSXML3 parser is covered in - detail, with attention to XML DOM interfaces, XSLT processing, - SAX and more. - - - Galos, Mike - Visual Studio 7: A Comprehensive Guide - Computer - 49.95 - 2001-04-16 - Microsoft Visual Studio 7 is explored in depth, - looking at how Visual Basic, Visual C++, C#, and ASP+ are - integrated into a comprehensive development - environment. - - \ No newline at end of file diff --git a/vendor/github.com/antchfx/xmlquery/cache.go b/vendor/github.com/antchfx/xmlquery/cache.go deleted file mode 100644 index 3abffcd..0000000 --- a/vendor/github.com/antchfx/xmlquery/cache.go +++ /dev/null @@ -1,43 +0,0 @@ -package xmlquery - -import ( - "sync" - - "github.com/golang/groupcache/lru" - - "github.com/antchfx/xpath" -) - -// DisableSelectorCache will disable caching for the query selector if value is true. -var DisableSelectorCache = false - -// SelectorCacheMaxEntries allows how many selector object can be caching. Default is 50. -// Will disable caching if SelectorCacheMaxEntries <= 0. -var SelectorCacheMaxEntries = 50 - -var ( - cacheOnce sync.Once - cache *lru.Cache - cacheMutex sync.Mutex -) - -func getQuery(expr string) (*xpath.Expr, error) { - if DisableSelectorCache || SelectorCacheMaxEntries <= 0 { - return xpath.Compile(expr) - } - cacheOnce.Do(func() { - cache = lru.New(SelectorCacheMaxEntries) - }) - cacheMutex.Lock() - defer cacheMutex.Unlock() - if v, ok := cache.Get(expr); ok { - return v.(*xpath.Expr), nil - } - v, err := xpath.Compile(expr) - if err != nil { - return nil, err - } - cache.Add(expr, v) - return v, nil - -} diff --git a/vendor/github.com/antchfx/xmlquery/cached_reader.go b/vendor/github.com/antchfx/xmlquery/cached_reader.go deleted file mode 100644 index fe389c5..0000000 --- a/vendor/github.com/antchfx/xmlquery/cached_reader.go +++ /dev/null @@ -1,69 +0,0 @@ -package xmlquery - -import ( - "bufio" -) - -type cachedReader struct { - buffer *bufio.Reader - cache []byte - cacheCap int - cacheLen int - caching bool -} - -func newCachedReader(r *bufio.Reader) *cachedReader { - return &cachedReader{ - buffer: r, - cache: make([]byte, 4096), - cacheCap: 4096, - cacheLen: 0, - caching: false, - } -} - -func (c *cachedReader) StartCaching() { - c.cacheLen = 0 - c.caching = true -} - -func (c *cachedReader) ReadByte() (byte, error) { - if !c.caching { - return c.buffer.ReadByte() - } - b, err := c.buffer.ReadByte() - if err != nil { - return b, err - } - if c.cacheLen < c.cacheCap { - c.cache[c.cacheLen] = b - c.cacheLen++ - } - return b, err -} - -func (c *cachedReader) Cache() []byte { - return c.cache[:c.cacheLen] -} - -func (c *cachedReader) StopCaching() { - c.caching = false -} - -func (c *cachedReader) Read(p []byte) (int, error) { - n, err := c.buffer.Read(p) - if err != nil { - return n, err - } - if c.caching && c.cacheLen < c.cacheCap { - for i := 0; i < n; i++ { - c.cache[c.cacheLen] = p[i] - c.cacheLen++ - if c.cacheLen >= c.cacheCap { - break - } - } - } - return n, err -} - diff --git a/vendor/github.com/antchfx/xmlquery/go.mod b/vendor/github.com/antchfx/xmlquery/go.mod deleted file mode 100644 index b6f453e..0000000 --- a/vendor/github.com/antchfx/xmlquery/go.mod +++ /dev/null @@ -1,9 +0,0 @@ -module github.com/antchfx/xmlquery - -go 1.14 - -require ( - github.com/antchfx/xpath v1.1.10 - github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e - golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc -) diff --git a/vendor/github.com/antchfx/xmlquery/go.sum b/vendor/github.com/antchfx/xmlquery/go.sum deleted file mode 100644 index 9f54294..0000000 --- a/vendor/github.com/antchfx/xmlquery/go.sum +++ /dev/null @@ -1,14 +0,0 @@ -github.com/antchfx/xpath v1.1.10 h1:cJ0pOvEdN/WvYXxvRrzQH9x5QWKpzHacYO8qzCcDYAg= -github.com/antchfx/xpath v1.1.10/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc h1:zK/HqS5bZxDptfPJNq8v7vJfXtkU7r9TLIoSr1bXaP4= -golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/vendor/github.com/antchfx/xmlquery/node.go b/vendor/github.com/antchfx/xmlquery/node.go deleted file mode 100644 index 1a87384..0000000 --- a/vendor/github.com/antchfx/xmlquery/node.go +++ /dev/null @@ -1,232 +0,0 @@ -package xmlquery - -import ( - "bytes" - "encoding/xml" - "fmt" - "strings" -) - -// A NodeType is the type of a Node. -type NodeType uint - -const ( - // DocumentNode is a document object that, as the root of the document tree, - // provides access to the entire XML document. - DocumentNode NodeType = iota - // DeclarationNode is the document type declaration, indicated by the - // following tag (for example, ). - DeclarationNode - // ElementNode is an element (for example, ). - ElementNode - // TextNode is the text content of a node. - TextNode - // CharDataNode node - CharDataNode - // CommentNode a comment (for example, ). - CommentNode - // AttributeNode is an attribute of element. - AttributeNode -) - -type Attr struct { - Name xml.Name - Value string - NamespaceURI string -} - -// A Node consists of a NodeType and some Data (tag name for -// element nodes, content for text) and are part of a tree of Nodes. -type Node struct { - Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node - - Type NodeType - Data string - Prefix string - NamespaceURI string - Attr []Attr - - level int // node level in the tree -} - -// InnerText returns the text between the start and end tags of the object. -func (n *Node) InnerText() string { - var output func(*bytes.Buffer, *Node) - output = func(buf *bytes.Buffer, n *Node) { - switch n.Type { - case TextNode, CharDataNode: - buf.WriteString(n.Data) - case CommentNode: - default: - for child := n.FirstChild; child != nil; child = child.NextSibling { - output(buf, child) - } - } - } - - var buf bytes.Buffer - output(&buf, n) - return buf.String() -} - -func (n *Node) sanitizedData(preserveSpaces bool) string { - if preserveSpaces { - return strings.Trim(n.Data, "\n\t") - } - return strings.TrimSpace(n.Data) -} - -func calculatePreserveSpaces(n *Node, pastValue bool) bool { - if attr := n.SelectAttr("xml:space"); attr == "preserve" { - return true - } else if attr == "default" { - return false - } - return pastValue -} - -func outputXML(buf *bytes.Buffer, n *Node, preserveSpaces bool) { - preserveSpaces = calculatePreserveSpaces(n, preserveSpaces) - switch n.Type { - case TextNode: - xml.EscapeText(buf, []byte(n.sanitizedData(preserveSpaces))) - return - case CharDataNode: - buf.WriteString("") - return - case CommentNode: - buf.WriteString("") - return - case DeclarationNode: - buf.WriteString("") - } else { - buf.WriteString(">") - } - for child := n.FirstChild; child != nil; child = child.NextSibling { - outputXML(buf, child, preserveSpaces) - } - if n.Type != DeclarationNode { - if n.Prefix == "" { - buf.WriteString(fmt.Sprintf("", n.Data)) - } else { - buf.WriteString(fmt.Sprintf("", n.Prefix, n.Data)) - } - } -} - -// OutputXML returns the text that including tags name. -func (n *Node) OutputXML(self bool) string { - var buf bytes.Buffer - if self { - outputXML(&buf, n, false) - } else { - for n := n.FirstChild; n != nil; n = n.NextSibling { - outputXML(&buf, n, false) - } - } - - return buf.String() -} - -// AddAttr adds a new attribute specified by 'key' and 'val' to a node 'n'. -func AddAttr(n *Node, key, val string) { - var attr Attr - if i := strings.Index(key, ":"); i > 0 { - attr = Attr{ - Name: xml.Name{Space: key[:i], Local: key[i+1:]}, - Value: val, - } - } else { - attr = Attr{ - Name: xml.Name{Local: key}, - Value: val, - } - } - - n.Attr = append(n.Attr, attr) -} - -// AddChild adds a new node 'n' to a node 'parent' as its last child. -func AddChild(parent, n *Node) { - n.Parent = parent - n.NextSibling = nil - if parent.FirstChild == nil { - parent.FirstChild = n - n.PrevSibling = nil - } else { - parent.LastChild.NextSibling = n - n.PrevSibling = parent.LastChild - } - - parent.LastChild = n -} - -// AddSibling adds a new node 'n' as a sibling of a given node 'sibling'. -// Note it is not necessarily true that the new node 'n' would be added -// immediately after 'sibling'. If 'sibling' isn't the last child of its -// parent, then the new node 'n' will be added at the end of the sibling -// chain of their parent. -func AddSibling(sibling, n *Node) { - for t := sibling.NextSibling; t != nil; t = t.NextSibling { - sibling = t - } - n.Parent = sibling.Parent - sibling.NextSibling = n - n.PrevSibling = sibling - n.NextSibling = nil - if sibling.Parent != nil { - sibling.Parent.LastChild = n - } -} - -// RemoveFromTree removes a node and its subtree from the document -// tree it is in. If the node is the root of the tree, then it's no-op. -func RemoveFromTree(n *Node) { - if n.Parent == nil { - return - } - if n.Parent.FirstChild == n { - if n.Parent.LastChild == n { - n.Parent.FirstChild = nil - n.Parent.LastChild = nil - } else { - n.Parent.FirstChild = n.NextSibling - n.NextSibling.PrevSibling = nil - } - } else { - if n.Parent.LastChild == n { - n.Parent.LastChild = n.PrevSibling - n.PrevSibling.NextSibling = nil - } else { - n.PrevSibling.NextSibling = n.NextSibling - n.NextSibling.PrevSibling = n.PrevSibling - } - } - n.Parent = nil - n.PrevSibling = nil - n.NextSibling = nil -} diff --git a/vendor/github.com/antchfx/xmlquery/options.go b/vendor/github.com/antchfx/xmlquery/options.go deleted file mode 100644 index f3e2f99..0000000 --- a/vendor/github.com/antchfx/xmlquery/options.go +++ /dev/null @@ -1,30 +0,0 @@ -package xmlquery - -import ( - "encoding/xml" -) - -type ParserOptions struct{ - Decoder *DecoderOptions -} - -func (options ParserOptions) apply(parser *parser) { - if options.Decoder != nil { - (*options.Decoder).apply(parser.decoder) - } -} - -// DecoderOptions implement the very same options than the standard -// encoding/xml package. Please refer to this documentation: -// https://golang.org/pkg/encoding/xml/#Decoder -type DecoderOptions struct{ - Strict bool - AutoClose []string - Entity map[string]string -} - -func (options DecoderOptions) apply(decoder *xml.Decoder) { - decoder.Strict = options.Strict - decoder.AutoClose = options.AutoClose - decoder.Entity = options.Entity -} diff --git a/vendor/github.com/antchfx/xmlquery/parse.go b/vendor/github.com/antchfx/xmlquery/parse.go deleted file mode 100644 index 810eb73..0000000 --- a/vendor/github.com/antchfx/xmlquery/parse.go +++ /dev/null @@ -1,365 +0,0 @@ -package xmlquery - -import ( - "bufio" - "encoding/xml" - "errors" - "fmt" - "io" - "net/http" - "regexp" - "strings" - - "github.com/antchfx/xpath" - "golang.org/x/net/html/charset" -) - -var xmlMIMERegex = regexp.MustCompile(`(?i)((application|image|message|model)/((\w|\.|-)+\+?)?|text/)(wb)?xml`) - -// LoadURL loads the XML document from the specified URL. -func LoadURL(url string) (*Node, error) { - resp, err := http.Get(url) - if err != nil { - return nil, err - } - defer resp.Body.Close() - // Make sure the Content-Type has a valid XML MIME type - if xmlMIMERegex.MatchString(resp.Header.Get("Content-Type")) { - return Parse(resp.Body) - } - return nil, fmt.Errorf("invalid XML document(%s)", resp.Header.Get("Content-Type")) -} - -// Parse returns the parse tree for the XML from the given Reader. -func Parse(r io.Reader) (*Node, error) { - return ParseWithOptions(r, ParserOptions{}) -} - -// ParseWithOptions is like parse, but with custom options -func ParseWithOptions(r io.Reader, options ParserOptions) (*Node, error) { - p := createParser(r) - options.apply(p) - for { - _, err := p.parse() - if err == io.EOF { - return p.doc, nil - } - if err != nil { - return nil, err - } - } -} - -type parser struct { - decoder *xml.Decoder - doc *Node - space2prefix map[string]string - level int - prev *Node - streamElementXPath *xpath.Expr // Under streaming mode, this specifies the xpath to the target element node(s). - streamElementFilter *xpath.Expr // If specified, it provides further filtering on the target element. - streamNode *Node // Need to remember the last target node So we can clean it up upon next Read() call. - streamNodePrev *Node // Need to remember target node's prev so upon target node removal, we can restore correct prev. - reader *cachedReader // Need to maintain a reference to the reader, so we can determine whether a node contains CDATA. -} - -func createParser(r io.Reader) *parser { - reader := newCachedReader(bufio.NewReader(r)) - p := &parser{ - decoder: xml.NewDecoder(reader), - doc: &Node{Type: DocumentNode}, - space2prefix: make(map[string]string), - level: 0, - reader: reader, - } - // http://www.w3.org/XML/1998/namespace is bound by definition to the prefix xml. - p.space2prefix["http://www.w3.org/XML/1998/namespace"] = "xml" - p.decoder.CharsetReader = charset.NewReaderLabel - p.prev = p.doc - return p -} - -func (p *parser) parse() (*Node, error) { - var streamElementNodeCounter int - - for { - tok, err := p.decoder.Token() - if err != nil { - return nil, err - } - - switch tok := tok.(type) { - case xml.StartElement: - if p.level == 0 { - // mising XML declaration - node := &Node{Type: DeclarationNode, Data: "xml", level: 1} - AddChild(p.prev, node) - p.level = 1 - p.prev = node - } - // https://www.w3.org/TR/xml-names/#scoping-defaulting - for _, att := range tok.Attr { - if att.Name.Local == "xmlns" { - p.space2prefix[att.Value] = "" - } else if att.Name.Space == "xmlns" { - p.space2prefix[att.Value] = att.Name.Local - } - } - - if tok.Name.Space != "" { - if _, found := p.space2prefix[tok.Name.Space]; !found { - return nil, errors.New("xmlquery: invalid XML document, namespace is missing") - } - } - - attributes := make([]Attr, len(tok.Attr)) - for i, att := range tok.Attr { - name := att.Name - if prefix, ok := p.space2prefix[name.Space]; ok { - name.Space = prefix - } - attributes[i] = Attr{ - Name: name, - Value: att.Value, - NamespaceURI: att.Name.Space, - } - } - - node := &Node{ - Type: ElementNode, - Data: tok.Name.Local, - Prefix: p.space2prefix[tok.Name.Space], - NamespaceURI: tok.Name.Space, - Attr: attributes, - level: p.level, - } - - if p.level == p.prev.level { - AddSibling(p.prev, node) - } else if p.level > p.prev.level { - AddChild(p.prev, node) - } else if p.level < p.prev.level { - for i := p.prev.level - p.level; i > 1; i-- { - p.prev = p.prev.Parent - } - AddSibling(p.prev.Parent, node) - } - // If we're in the streaming mode, we need to remember the node if it is the target node - // so that when we finish processing the node's EndElement, we know how/what to return to - // caller. Also we need to remove the target node from the tree upon next Read() call so - // memory doesn't grow unbounded. - if p.streamElementXPath != nil { - if p.streamNode == nil { - if QuerySelector(p.doc, p.streamElementXPath) != nil { - p.streamNode = node - p.streamNodePrev = p.prev - streamElementNodeCounter = 1 - } - } else { - streamElementNodeCounter++ - } - } - p.prev = node - p.level++ - p.reader.StartCaching() - case xml.EndElement: - p.level-- - // If we're in streaming mode, and we already have a potential streaming - // target node identified (p.streamNode != nil) then we need to check if - // this is the real one we want to return to caller. - if p.streamNode != nil { - streamElementNodeCounter-- - if streamElementNodeCounter == 0 { - // Now we know this element node is the at least passing the initial - // p.streamElementXPath check and is a potential target node candidate. - // We need to have 1 more check with p.streamElementFilter (if given) to - // ensure it is really the element node we want. - // The reason we need a two-step check process is because the following - // situation: - // b1 - // And say the p.streamElementXPath = "/AAA/BBB[. != 'b1']". Now during - // xml.StartElement time, the node is still empty, so it will pass - // the p.streamElementXPath check. However, eventually we know this - // shouldn't be returned to the caller. Having a second more fine-grained - // filter check ensures that. So in this case, the caller should really - // setup the stream parser with: - // streamElementXPath = "/AAA/BBB[" - // streamElementFilter = "/AAA/BBB[. != 'b1']" - if p.streamElementFilter == nil || QuerySelector(p.doc, p.streamElementFilter) != nil { - return p.streamNode, nil - } - // otherwise, this isn't our target node, clean things up. - // note we also remove the underlying *Node from the node tree, to prevent - // future stream node candidate selection error. - RemoveFromTree(p.streamNode) - p.prev = p.streamNodePrev - p.streamNode = nil - p.streamNodePrev = nil - } - } - case xml.CharData: - p.reader.StopCaching() - // First, normalize the cache... - cached := strings.ToUpper(string(p.reader.Cache())) - nodeType := TextNode - if strings.HasPrefix(cached, " p.prev.level { - AddChild(p.prev, node) - } else if p.level < p.prev.level { - for i := p.prev.level - p.level; i > 1; i-- { - p.prev = p.prev.Parent - } - AddSibling(p.prev.Parent, node) - } - p.reader.StartCaching() - case xml.Comment: - node := &Node{Type: CommentNode, Data: string(tok), level: p.level} - if p.level == p.prev.level { - AddSibling(p.prev, node) - } else if p.level > p.prev.level { - AddChild(p.prev, node) - } else if p.level < p.prev.level { - for i := p.prev.level - p.level; i > 1; i-- { - p.prev = p.prev.Parent - } - AddSibling(p.prev.Parent, node) - } - case xml.ProcInst: // Processing Instruction - if p.prev.Type != DeclarationNode { - p.level++ - } - node := &Node{Type: DeclarationNode, Data: tok.Target, level: p.level} - pairs := strings.Split(string(tok.Inst), " ") - for _, pair := range pairs { - pair = strings.TrimSpace(pair) - if i := strings.Index(pair, "="); i > 0 { - AddAttr(node, pair[:i], strings.Trim(pair[i+1:], `"`)) - } - } - if p.level == p.prev.level { - AddSibling(p.prev, node) - } else if p.level > p.prev.level { - AddChild(p.prev, node) - } - p.prev = node - case xml.Directive: - } - } -} - -// StreamParser enables loading and parsing an XML document in a streaming -// fashion. -type StreamParser struct { - p *parser -} - -// CreateStreamParser creates a StreamParser. Argument streamElementXPath is -// required. -// Argument streamElementFilter is optional and should only be used in advanced -// scenarios. -// -// Scenario 1: simple case: -// xml := `b1b2` -// sp, err := CreateStreamParser(strings.NewReader(xml), "/AAA/BBB") -// if err != nil { -// panic(err) -// } -// for { -// n, err := sp.Read() -// if err != nil { -// break -// } -// fmt.Println(n.OutputXML(true)) -// } -// Output will be: -// b1 -// b2 -// -// Scenario 2: advanced case: -// xml := `b1b2` -// sp, err := CreateStreamParser(strings.NewReader(xml), "/AAA/BBB", "/AAA/BBB[. != 'b1']") -// if err != nil { -// panic(err) -// } -// for { -// n, err := sp.Read() -// if err != nil { -// break -// } -// fmt.Println(n.OutputXML(true)) -// } -// Output will be: -// b2 -// -// As the argument names indicate, streamElementXPath should be used for -// providing xpath query pointing to the target element node only, no extra -// filtering on the element itself or its children; while streamElementFilter, -// if needed, can provide additional filtering on the target element and its -// children. -// -// CreateStreamParser returns an error if either streamElementXPath or -// streamElementFilter, if provided, cannot be successfully parsed and compiled -// into a valid xpath query. -func CreateStreamParser(r io.Reader, streamElementXPath string, streamElementFilter ...string) (*StreamParser, error) { - return CreateStreamParserWithOptions(r, ParserOptions{}, streamElementXPath, streamElementFilter...) -} - -// CreateStreamParserWithOptions is like CreateStreamParser, but with custom options -func CreateStreamParserWithOptions( - r io.Reader, - options ParserOptions, - streamElementXPath string, - streamElementFilter ...string, -) (*StreamParser, error) { - elemXPath, err := getQuery(streamElementXPath) - if err != nil { - return nil, fmt.Errorf("invalid streamElementXPath '%s', err: %s", streamElementXPath, err.Error()) - } - elemFilter := (*xpath.Expr)(nil) - if len(streamElementFilter) > 0 { - elemFilter, err = getQuery(streamElementFilter[0]) - if err != nil { - return nil, fmt.Errorf("invalid streamElementFilter '%s', err: %s", streamElementFilter[0], err.Error()) - } - } - parser := createParser(r) - options.apply(parser) - sp := &StreamParser{ - p: parser, - } - sp.p.streamElementXPath = elemXPath - sp.p.streamElementFilter = elemFilter - return sp, nil -} - -// Read returns a target node that satisfies the XPath specified by caller at -// StreamParser creation time. If there is no more satisfying target nodes after -// reading the rest of the XML document, io.EOF will be returned. At any time, -// any XML parsing error encountered will be returned, and the stream parsing -// stopped. Calling Read() after an error is returned (including io.EOF) results -// undefined behavior. Also note, due to the streaming nature, calling Read() -// will automatically remove any previous target node(s) from the document tree. -func (sp *StreamParser) Read() (*Node, error) { - // Because this is a streaming read, we need to release/remove last - // target node from the node tree to free up memory. - if sp.p.streamNode != nil { - // We need to remove all siblings before the current stream node, - // because the document may contain unwanted nodes between the target - // ones (for example new line text node), which would otherwise - // accumulate as first childs, and slow down the stream over time - for sp.p.streamNode.PrevSibling != nil { - RemoveFromTree(sp.p.streamNode.PrevSibling) - } - sp.p.prev = sp.p.streamNode.Parent - RemoveFromTree(sp.p.streamNode) - sp.p.streamNode = nil - sp.p.streamNodePrev = nil - } - return sp.p.parse() -} diff --git a/vendor/github.com/antchfx/xmlquery/query.go b/vendor/github.com/antchfx/xmlquery/query.go deleted file mode 100644 index 106719b..0000000 --- a/vendor/github.com/antchfx/xmlquery/query.go +++ /dev/null @@ -1,309 +0,0 @@ -/* -Package xmlquery provides extract data from XML documents using XPath expression. -*/ -package xmlquery - -import ( - "fmt" - "strings" - - "github.com/antchfx/xpath" -) - -// SelectElements finds child elements with the specified name. -func (n *Node) SelectElements(name string) []*Node { - return Find(n, name) -} - -// SelectElement finds child elements with the specified name. -func (n *Node) SelectElement(name string) *Node { - return FindOne(n, name) -} - -// SelectAttr returns the attribute value with the specified name. -func (n *Node) SelectAttr(name string) string { - if n.Type == AttributeNode { - if n.Data == name { - return n.InnerText() - } - return "" - } - var local, space string - local = name - if i := strings.Index(name, ":"); i > 0 { - space = name[:i] - local = name[i+1:] - } - for _, attr := range n.Attr { - if attr.Name.Local == local && attr.Name.Space == space { - return attr.Value - } - } - return "" -} - -var _ xpath.NodeNavigator = &NodeNavigator{} - -// CreateXPathNavigator creates a new xpath.NodeNavigator for the specified -// XML Node. -func CreateXPathNavigator(top *Node) *NodeNavigator { - return &NodeNavigator{curr: top, root: top, attr: -1} -} - -func getCurrentNode(it *xpath.NodeIterator) *Node { - n := it.Current().(*NodeNavigator) - if n.NodeType() == xpath.AttributeNode { - childNode := &Node{ - Type: TextNode, - Data: n.Value(), - } - return &Node{ - Parent: n.curr, - Type: AttributeNode, - Data: n.LocalName(), - FirstChild: childNode, - LastChild: childNode, - } - } - return n.curr -} - -// Find is like QueryAll but panics if `expr` is not a valid XPath expression. -// See `QueryAll()` function. -func Find(top *Node, expr string) []*Node { - nodes, err := QueryAll(top, expr) - if err != nil { - panic(err) - } - return nodes -} - -// FindOne is like Query but panics if `expr` is not a valid XPath expression. -// See `Query()` function. -func FindOne(top *Node, expr string) *Node { - node, err := Query(top, expr) - if err != nil { - panic(err) - } - return node -} - -// QueryAll searches the XML Node that matches by the specified XPath expr. -// Returns an error if the expression `expr` cannot be parsed. -func QueryAll(top *Node, expr string) ([]*Node, error) { - exp, err := getQuery(expr) - if err != nil { - return nil, err - } - return QuerySelectorAll(top, exp), nil -} - -// Query searches the XML Node that matches by the specified XPath expr, -// and returns first matched element. -func Query(top *Node, expr string) (*Node, error) { - exp, err := getQuery(expr) - if err != nil { - return nil, err - } - return QuerySelector(top, exp), nil -} - -// QuerySelectorAll searches all of the XML Node that matches the specified -// XPath selectors. -func QuerySelectorAll(top *Node, selector *xpath.Expr) []*Node { - t := selector.Select(CreateXPathNavigator(top)) - var elems []*Node - for t.MoveNext() { - elems = append(elems, getCurrentNode(t)) - } - return elems -} - -// QuerySelector returns the first matched XML Node by the specified XPath -// selector. -func QuerySelector(top *Node, selector *xpath.Expr) *Node { - t := selector.Select(CreateXPathNavigator(top)) - if t.MoveNext() { - return getCurrentNode(t) - } - return nil -} - -// FindEach searches the html.Node and calls functions cb. -// Important: this method is deprecated, instead, use for .. = range Find(){}. -func FindEach(top *Node, expr string, cb func(int, *Node)) { - for i, n := range Find(top, expr) { - cb(i, n) - } -} - -// FindEachWithBreak functions the same as FindEach but allows to break the loop -// by returning false from the callback function `cb`. -// Important: this method is deprecated, instead, use .. = range Find(){}. -func FindEachWithBreak(top *Node, expr string, cb func(int, *Node) bool) { - for i, n := range Find(top, expr) { - if !cb(i, n) { - break - } - } -} - -type NodeNavigator struct { - root, curr *Node - attr int -} - -func (x *NodeNavigator) Current() *Node { - return x.curr -} - -func (x *NodeNavigator) NodeType() xpath.NodeType { - switch x.curr.Type { - case CommentNode: - return xpath.CommentNode - case TextNode, CharDataNode: - return xpath.TextNode - case DeclarationNode, DocumentNode: - return xpath.RootNode - case ElementNode: - if x.attr != -1 { - return xpath.AttributeNode - } - return xpath.ElementNode - } - panic(fmt.Sprintf("unknown XML node type: %v", x.curr.Type)) -} - -func (x *NodeNavigator) LocalName() string { - if x.attr != -1 { - return x.curr.Attr[x.attr].Name.Local - } - return x.curr.Data - -} - -func (x *NodeNavigator) Prefix() string { - if x.NodeType() == xpath.AttributeNode { - if x.attr != -1 { - return x.curr.Attr[x.attr].Name.Space - } - return "" - } - return x.curr.Prefix -} - -func (x *NodeNavigator) NamespaceURL() string { - if x.attr != -1 { - return x.curr.Attr[x.attr].NamespaceURI - } - return x.curr.NamespaceURI -} - -func (x *NodeNavigator) Value() string { - switch x.curr.Type { - case CommentNode: - return x.curr.Data - case ElementNode: - if x.attr != -1 { - return x.curr.Attr[x.attr].Value - } - return x.curr.InnerText() - case TextNode: - return x.curr.Data - } - return "" -} - -func (x *NodeNavigator) Copy() xpath.NodeNavigator { - n := *x - return &n -} - -func (x *NodeNavigator) MoveToRoot() { - x.curr = x.root -} - -func (x *NodeNavigator) MoveToParent() bool { - if x.attr != -1 { - x.attr = -1 - return true - } else if node := x.curr.Parent; node != nil { - x.curr = node - return true - } - return false -} - -func (x *NodeNavigator) MoveToNextAttribute() bool { - if x.attr >= len(x.curr.Attr)-1 { - return false - } - x.attr++ - return true -} - -func (x *NodeNavigator) MoveToChild() bool { - if x.attr != -1 { - return false - } - if node := x.curr.FirstChild; node != nil { - x.curr = node - return true - } - return false -} - -func (x *NodeNavigator) MoveToFirst() bool { - if x.attr != -1 || x.curr.PrevSibling == nil { - return false - } - for { - node := x.curr.PrevSibling - if node == nil { - break - } - x.curr = node - } - return true -} - -func (x *NodeNavigator) String() string { - return x.Value() -} - -func (x *NodeNavigator) MoveToNext() bool { - if x.attr != -1 { - return false - } - for node := x.curr.NextSibling; node != nil; node = x.curr.NextSibling { - x.curr = node - if x.curr.Type != TextNode { - return true - } - } - return false -} - -func (x *NodeNavigator) MoveToPrevious() bool { - if x.attr != -1 { - return false - } - for node := x.curr.PrevSibling; node != nil; node = x.curr.PrevSibling { - x.curr = node - if x.curr.Type != TextNode { - return true - } - } - return false -} - -func (x *NodeNavigator) MoveTo(other xpath.NodeNavigator) bool { - node, ok := other.(*NodeNavigator) - if !ok || node.root != x.root { - return false - } - - x.curr = node.curr - x.attr = node.attr - return true -} diff --git a/vendor/github.com/antchfx/xpath/.gitignore b/vendor/github.com/antchfx/xpath/.gitignore deleted file mode 100644 index 4d5d27b..0000000 --- a/vendor/github.com/antchfx/xpath/.gitignore +++ /dev/null @@ -1,32 +0,0 @@ -# vscode -.vscode -debug -*.test - -./build - -# Compiled Object files, Static and Dynamic libs (Shared Objects) -*.o -*.a -*.so - - -# Folders -_obj -_test - -# Architecture specific extensions/prefixes -*.[568vq] -[568vq].out - -*.cgo1.go -*.cgo2.c -_cgo_defun.c -_cgo_gotypes.go -_cgo_export.* - -_testmain.go - -*.exe -*.test -*.prof \ No newline at end of file diff --git a/vendor/github.com/antchfx/xpath/.travis.yml b/vendor/github.com/antchfx/xpath/.travis.yml deleted file mode 100644 index 6b63957..0000000 --- a/vendor/github.com/antchfx/xpath/.travis.yml +++ /dev/null @@ -1,12 +0,0 @@ -language: go - -go: - - 1.6 - - 1.9 - - '1.10' - -install: - - go get github.com/mattn/goveralls - -script: - - $HOME/gopath/bin/goveralls -service=travis-ci \ No newline at end of file diff --git a/vendor/github.com/antchfx/xpath/BUILD.bazel b/vendor/github.com/antchfx/xpath/BUILD.bazel deleted file mode 100644 index 37c8580..0000000 --- a/vendor/github.com/antchfx/xpath/BUILD.bazel +++ /dev/null @@ -1,18 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -go_library( - name = "xpath", - srcs = [ - "build.go", - "func.go", - "func_go110.go", - "func_pre_go110.go", - "operator.go", - "parse.go", - "query.go", - "xpath.go", - ], - importmap = "peridot.resf.org/vendor/github.com/antchfx/xpath", - importpath = "github.com/antchfx/xpath", - visibility = ["//visibility:public"], -) diff --git a/vendor/github.com/antchfx/xpath/LICENSE b/vendor/github.com/antchfx/xpath/LICENSE deleted file mode 100644 index e14c371..0000000 --- a/vendor/github.com/antchfx/xpath/LICENSE +++ /dev/null @@ -1,17 +0,0 @@ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file diff --git a/vendor/github.com/antchfx/xpath/README.md b/vendor/github.com/antchfx/xpath/README.md deleted file mode 100644 index 540285d..0000000 --- a/vendor/github.com/antchfx/xpath/README.md +++ /dev/null @@ -1,172 +0,0 @@ -XPath -==== -[![GoDoc](https://godoc.org/github.com/antchfx/xpath?status.svg)](https://godoc.org/github.com/antchfx/xpath) -[![Coverage Status](https://coveralls.io/repos/github/antchfx/xpath/badge.svg?branch=master)](https://coveralls.io/github/antchfx/xpath?branch=master) -[![Build Status](https://travis-ci.org/antchfx/xpath.svg?branch=master)](https://travis-ci.org/antchfx/xpath) -[![Go Report Card](https://goreportcard.com/badge/github.com/antchfx/xpath)](https://goreportcard.com/report/github.com/antchfx/xpath) - -XPath is Go package provides selecting nodes from XML, HTML or other documents using XPath expression. - -Implementation -=== - -- [htmlquery](https://github.com/antchfx/htmlquery) - an XPath query package for HTML document - -- [xmlquery](https://github.com/antchfx/xmlquery) - an XPath query package for XML document. - -- [jsonquery](https://github.com/antchfx/jsonquery) - an XPath query package for JSON document - -Supported Features -=== - -#### The basic XPath patterns. - -> The basic XPath patterns cover 90% of the cases that most stylesheets will need. - -- `node` : Selects all child elements with nodeName of node. - -- `*` : Selects all child elements. - -- `@attr` : Selects the attribute attr. - -- `@*` : Selects all attributes. - -- `node()` : Matches an org.w3c.dom.Node. - -- `text()` : Matches a org.w3c.dom.Text node. - -- `comment()` : Matches a comment. - -- `.` : Selects the current node. - -- `..` : Selects the parent of current node. - -- `/` : Selects the document node. - -- `a[expr]` : Select only those nodes matching a which also satisfy the expression expr. - -- `a[n]` : Selects the nth matching node matching a When a filter's expression is a number, XPath selects based on position. - -- `a/b` : For each node matching a, add the nodes matching b to the result. - -- `a//b` : For each node matching a, add the descendant nodes matching b to the result. - -- `//b` : Returns elements in the entire document matching b. - -- `a|b` : All nodes matching a or b, union operation(not boolean or). - -- `(a, b, c)` : Evaluates each of its operands and concatenates the resulting sequences, in order, into a single result sequence - - -#### Node Axes - -- `child::*` : The child axis selects children of the current node. - -- `descendant::*` : The descendant axis selects descendants of the current node. It is equivalent to '//'. - -- `descendant-or-self::*` : Selects descendants including the current node. - -- `attribute::*` : Selects attributes of the current element. It is equivalent to @* - -- `following-sibling::*` : Selects nodes after the current node. - -- `preceding-sibling::*` : Selects nodes before the current node. - -- `following::*` : Selects the first matching node following in document order, excluding descendants. - -- `preceding::*` : Selects the first matching node preceding in document order, excluding ancestors. - -- `parent::*` : Selects the parent if it matches. The '..' pattern from the core is equivalent to 'parent::node()'. - -- `ancestor::*` : Selects matching ancestors. - -- `ancestor-or-self::*` : Selects ancestors including the current node. - -- `self::*` : Selects the current node. '.' is equivalent to 'self::node()'. - -#### Expressions - - The gxpath supported three types: number, boolean, string. - -- `path` : Selects nodes based on the path. - -- `a = b` : Standard comparisons. - - * a = b True if a equals b. - * a != b True if a is not equal to b. - * a < b True if a is less than b. - * a <= b True if a is less than or equal to b. - * a > b True if a is greater than b. - * a >= b True if a is greater than or equal to b. - -- `a + b` : Arithmetic expressions. - - * `- a` Unary minus - * a + b Add - * a - b Substract - * a * b Multiply - * a div b Divide - * a mod b Floating point mod, like Java. - -- `a or b` : Boolean `or` operation. - -- `a and b` : Boolean `and` operation. - -- `(expr)` : Parenthesized expressions. - -- `fun(arg1, ..., argn)` : Function calls: - -| Function | Supported | -| --- | --- | -`boolean()`| ✓ | -`ceiling()`| ✓ | -`choose()`| ✗ | -`concat()`| ✓ | -`contains()`| ✓ | -`count()`| ✓ | -`current()`| ✗ | -`document()`| ✗ | -`element-available()`| ✗ | -`ends-with()`| ✓ | -`false()`| ✓ | -`floor()`| ✓ | -`format-number()`| ✗ | -`function-available()`| ✗ | -`generate-id()`| ✗ | -`id()`| ✗ | -`key()`| ✗ | -`lang()`| ✗ | -`last()`| ✓ | -`local-name()`| ✓ | -`name()`| ✓ | -`namespace-uri()`| ✓ | -`normalize-space()`| ✓ | -`not()`| ✓ | -`number()`| ✓ | -`position()`| ✓ | -`replace()`| ✓ | -`reverse()`| ✓ | -`round()`| ✓ | -`starts-with()`| ✓ | -`string()`| ✓ | -`string-length()`| ✓ | -`substring()`| ✓ | -`substring-after()`| ✓ | -`substring-before()`| ✓ | -`sum()`| ✓ | -`system-property()`| ✗ | -`translate()`| ✓ | -`true()`| ✓ | -`unparsed-entity-url()` | ✗ | - -Changelogs -=== - -2019-03-19 -- optimize XPath `|` operation performance. [#33](https://github.com/antchfx/xpath/issues/33). Tips: suggest split into multiple subquery if you have a lot of `|` operations. - -2019-01-29 -- improvement `normalize-space` function. [#32](https://github.com/antchfx/xpath/issues/32) - -2018-12-07 -- supports XPath 2.0 Sequence expressions. [#30](https://github.com/antchfx/xpath/pull/30) by [@minherz](https://github.com/minherz). \ No newline at end of file diff --git a/vendor/github.com/antchfx/xpath/build.go b/vendor/github.com/antchfx/xpath/build.go deleted file mode 100644 index b7f850f..0000000 --- a/vendor/github.com/antchfx/xpath/build.go +++ /dev/null @@ -1,522 +0,0 @@ -package xpath - -import ( - "errors" - "fmt" -) - -type flag int - -const ( - noneFlag flag = iota - filterFlag -) - -// builder provides building an XPath expressions. -type builder struct { - depth int - flag flag - firstInput query -} - -// axisPredicate creates a predicate to predicating for this axis node. -func axisPredicate(root *axisNode) func(NodeNavigator) bool { - // get current axix node type. - typ := ElementNode - switch root.AxeType { - case "attribute": - typ = AttributeNode - case "self", "parent": - typ = allNode - default: - switch root.Prop { - case "comment": - typ = CommentNode - case "text": - typ = TextNode - // case "processing-instruction": - // typ = ProcessingInstructionNode - case "node": - typ = allNode - } - } - nametest := root.LocalName != "" || root.Prefix != "" - predicate := func(n NodeNavigator) bool { - if typ == n.NodeType() || typ == allNode || typ == TextNode { - if nametest { - if root.LocalName == n.LocalName() && root.Prefix == n.Prefix() { - return true - } - } else { - return true - } - } - return false - } - - return predicate -} - -// processAxisNode processes a query for the XPath axis node. -func (b *builder) processAxisNode(root *axisNode) (query, error) { - var ( - err error - qyInput query - qyOutput query - predicate = axisPredicate(root) - ) - - if root.Input == nil { - qyInput = &contextQuery{} - } else { - if root.AxeType == "child" && (root.Input.Type() == nodeAxis) { - if input := root.Input.(*axisNode); input.AxeType == "descendant-or-self" { - var qyGrandInput query - if input.Input != nil { - qyGrandInput, _ = b.processNode(input.Input) - } else { - qyGrandInput = &contextQuery{} - } - // fix #20: https://github.com/antchfx/htmlquery/issues/20 - filter := func(n NodeNavigator) bool { - v := predicate(n) - switch root.Prop { - case "text": - v = v && n.NodeType() == TextNode - case "comment": - v = v && n.NodeType() == CommentNode - } - return v - } - qyOutput = &descendantQuery{Input: qyGrandInput, Predicate: filter, Self: true} - return qyOutput, nil - } - } - qyInput, err = b.processNode(root.Input) - if err != nil { - return nil, err - } - } - - switch root.AxeType { - case "ancestor": - qyOutput = &ancestorQuery{Input: qyInput, Predicate: predicate} - case "ancestor-or-self": - qyOutput = &ancestorQuery{Input: qyInput, Predicate: predicate, Self: true} - case "attribute": - qyOutput = &attributeQuery{Input: qyInput, Predicate: predicate} - case "child": - filter := func(n NodeNavigator) bool { - v := predicate(n) - switch root.Prop { - case "text": - v = v && n.NodeType() == TextNode - case "node": - v = v && (n.NodeType() == ElementNode || n.NodeType() == TextNode) - case "comment": - v = v && n.NodeType() == CommentNode - } - return v - } - qyOutput = &childQuery{Input: qyInput, Predicate: filter} - case "descendant": - qyOutput = &descendantQuery{Input: qyInput, Predicate: predicate} - case "descendant-or-self": - qyOutput = &descendantQuery{Input: qyInput, Predicate: predicate, Self: true} - case "following": - qyOutput = &followingQuery{Input: qyInput, Predicate: predicate} - case "following-sibling": - qyOutput = &followingQuery{Input: qyInput, Predicate: predicate, Sibling: true} - case "parent": - qyOutput = &parentQuery{Input: qyInput, Predicate: predicate} - case "preceding": - qyOutput = &precedingQuery{Input: qyInput, Predicate: predicate} - case "preceding-sibling": - qyOutput = &precedingQuery{Input: qyInput, Predicate: predicate, Sibling: true} - case "self": - qyOutput = &selfQuery{Input: qyInput, Predicate: predicate} - case "namespace": - // haha,what will you do someting?? - default: - err = fmt.Errorf("unknown axe type: %s", root.AxeType) - return nil, err - } - return qyOutput, nil -} - -// processFilterNode builds query for the XPath filter predicate. -func (b *builder) processFilterNode(root *filterNode) (query, error) { - b.flag |= filterFlag - - qyInput, err := b.processNode(root.Input) - if err != nil { - return nil, err - } - qyCond, err := b.processNode(root.Condition) - if err != nil { - return nil, err - } - qyOutput := &filterQuery{Input: qyInput, Predicate: qyCond} - return qyOutput, nil -} - -// processFunctionNode processes query for the XPath function node. -func (b *builder) processFunctionNode(root *functionNode) (query, error) { - var qyOutput query - switch root.FuncName { - case "starts-with": - arg1, err := b.processNode(root.Args[0]) - if err != nil { - return nil, err - } - arg2, err := b.processNode(root.Args[1]) - if err != nil { - return nil, err - } - qyOutput = &functionQuery{Input: b.firstInput, Func: startwithFunc(arg1, arg2)} - case "ends-with": - arg1, err := b.processNode(root.Args[0]) - if err != nil { - return nil, err - } - arg2, err := b.processNode(root.Args[1]) - if err != nil { - return nil, err - } - qyOutput = &functionQuery{Input: b.firstInput, Func: endwithFunc(arg1, arg2)} - case "contains": - arg1, err := b.processNode(root.Args[0]) - if err != nil { - return nil, err - } - arg2, err := b.processNode(root.Args[1]) - if err != nil { - return nil, err - } - - qyOutput = &functionQuery{Input: b.firstInput, Func: containsFunc(arg1, arg2)} - case "substring": - //substring( string , start [, length] ) - if len(root.Args) < 2 { - return nil, errors.New("xpath: substring function must have at least two parameter") - } - var ( - arg1, arg2, arg3 query - err error - ) - if arg1, err = b.processNode(root.Args[0]); err != nil { - return nil, err - } - if arg2, err = b.processNode(root.Args[1]); err != nil { - return nil, err - } - if len(root.Args) == 3 { - if arg3, err = b.processNode(root.Args[2]); err != nil { - return nil, err - } - } - qyOutput = &functionQuery{Input: b.firstInput, Func: substringFunc(arg1, arg2, arg3)} - case "substring-before", "substring-after": - //substring-xxxx( haystack, needle ) - if len(root.Args) != 2 { - return nil, errors.New("xpath: substring-before function must have two parameters") - } - var ( - arg1, arg2 query - err error - ) - if arg1, err = b.processNode(root.Args[0]); err != nil { - return nil, err - } - if arg2, err = b.processNode(root.Args[1]); err != nil { - return nil, err - } - qyOutput = &functionQuery{ - Input: b.firstInput, - Func: substringIndFunc(arg1, arg2, root.FuncName == "substring-after"), - } - case "string-length": - // string-length( [string] ) - if len(root.Args) < 1 { - return nil, errors.New("xpath: string-length function must have at least one parameter") - } - arg1, err := b.processNode(root.Args[0]) - if err != nil { - return nil, err - } - qyOutput = &functionQuery{Input: b.firstInput, Func: stringLengthFunc(arg1)} - case "normalize-space": - if len(root.Args) == 0 { - return nil, errors.New("xpath: normalize-space function must have at least one parameter") - } - argQuery, err := b.processNode(root.Args[0]) - if err != nil { - return nil, err - } - qyOutput = &functionQuery{Input: argQuery, Func: normalizespaceFunc} - case "replace": - //replace( string , string, string ) - if len(root.Args) != 3 { - return nil, errors.New("xpath: replace function must have three parameters") - } - var ( - arg1, arg2, arg3 query - err error - ) - if arg1, err = b.processNode(root.Args[0]); err != nil { - return nil, err - } - if arg2, err = b.processNode(root.Args[1]); err != nil { - return nil, err - } - if arg3, err = b.processNode(root.Args[2]); err != nil { - return nil, err - } - qyOutput = &functionQuery{Input: b.firstInput, Func: replaceFunc(arg1, arg2, arg3)} - case "translate": - //translate( string , string, string ) - if len(root.Args) != 3 { - return nil, errors.New("xpath: translate function must have three parameters") - } - var ( - arg1, arg2, arg3 query - err error - ) - if arg1, err = b.processNode(root.Args[0]); err != nil { - return nil, err - } - if arg2, err = b.processNode(root.Args[1]); err != nil { - return nil, err - } - if arg3, err = b.processNode(root.Args[2]); err != nil { - return nil, err - } - qyOutput = &functionQuery{Input: b.firstInput, Func: translateFunc(arg1, arg2, arg3)} - case "not": - if len(root.Args) == 0 { - return nil, errors.New("xpath: not function must have at least one parameter") - } - argQuery, err := b.processNode(root.Args[0]) - if err != nil { - return nil, err - } - qyOutput = &functionQuery{Input: argQuery, Func: notFunc} - case "name", "local-name", "namespace-uri": - if len(root.Args) > 1 { - return nil, fmt.Errorf("xpath: %s function must have at most one parameter", root.FuncName) - } - var ( - arg query - err error - ) - if len(root.Args) == 1 { - arg, err = b.processNode(root.Args[0]) - if err != nil { - return nil, err - } - } - switch root.FuncName { - case "name": - qyOutput = &functionQuery{Input: b.firstInput, Func: nameFunc(arg)} - case "local-name": - qyOutput = &functionQuery{Input: b.firstInput, Func: localNameFunc(arg)} - case "namespace-uri": - qyOutput = &functionQuery{Input: b.firstInput, Func: namespaceFunc(arg)} - } - case "true", "false": - val := root.FuncName == "true" - qyOutput = &functionQuery{ - Input: b.firstInput, - Func: func(_ query, _ iterator) interface{} { - return val - }, - } - case "last": - qyOutput = &functionQuery{Input: b.firstInput, Func: lastFunc} - case "position": - qyOutput = &functionQuery{Input: b.firstInput, Func: positionFunc} - case "boolean", "number", "string": - inp := b.firstInput - if len(root.Args) > 1 { - return nil, fmt.Errorf("xpath: %s function must have at most one parameter", root.FuncName) - } - if len(root.Args) == 1 { - argQuery, err := b.processNode(root.Args[0]) - if err != nil { - return nil, err - } - inp = argQuery - } - f := &functionQuery{Input: inp} - switch root.FuncName { - case "boolean": - f.Func = booleanFunc - case "string": - f.Func = stringFunc - case "number": - f.Func = numberFunc - } - qyOutput = f - case "count": - //if b.firstInput == nil { - // return nil, errors.New("xpath: expression must evaluate to node-set") - //} - if len(root.Args) == 0 { - return nil, fmt.Errorf("xpath: count(node-sets) function must with have parameters node-sets") - } - argQuery, err := b.processNode(root.Args[0]) - if err != nil { - return nil, err - } - qyOutput = &functionQuery{Input: argQuery, Func: countFunc} - case "sum": - if len(root.Args) == 0 { - return nil, fmt.Errorf("xpath: sum(node-sets) function must with have parameters node-sets") - } - argQuery, err := b.processNode(root.Args[0]) - if err != nil { - return nil, err - } - qyOutput = &functionQuery{Input: argQuery, Func: sumFunc} - case "ceiling", "floor", "round": - if len(root.Args) == 0 { - return nil, fmt.Errorf("xpath: ceiling(node-sets) function must with have parameters node-sets") - } - argQuery, err := b.processNode(root.Args[0]) - if err != nil { - return nil, err - } - f := &functionQuery{Input: argQuery} - switch root.FuncName { - case "ceiling": - f.Func = ceilingFunc - case "floor": - f.Func = floorFunc - case "round": - f.Func = roundFunc - } - qyOutput = f - case "concat": - if len(root.Args) < 2 { - return nil, fmt.Errorf("xpath: concat() must have at least two arguments") - } - var args []query - for _, v := range root.Args { - q, err := b.processNode(v) - if err != nil { - return nil, err - } - args = append(args, q) - } - qyOutput = &functionQuery{Input: b.firstInput, Func: concatFunc(args...)} - case "reverse": - if len(root.Args) == 0 { - return nil, fmt.Errorf("xpath: reverse(node-sets) function must with have parameters node-sets") - } - argQuery, err := b.processNode(root.Args[0]) - if err != nil { - return nil, err - } - qyOutput = &transformFunctionQuery{Input: argQuery, Func: reverseFunc} - default: - return nil, fmt.Errorf("not yet support this function %s()", root.FuncName) - } - return qyOutput, nil -} - -func (b *builder) processOperatorNode(root *operatorNode) (query, error) { - left, err := b.processNode(root.Left) - if err != nil { - return nil, err - } - right, err := b.processNode(root.Right) - if err != nil { - return nil, err - } - var qyOutput query - switch root.Op { - case "+", "-", "div", "mod": // Numeric operator - var exprFunc func(interface{}, interface{}) interface{} - switch root.Op { - case "+": - exprFunc = plusFunc - case "-": - exprFunc = minusFunc - case "div": - exprFunc = divFunc - case "mod": - exprFunc = modFunc - } - qyOutput = &numericQuery{Left: left, Right: right, Do: exprFunc} - case "=", ">", ">=", "<", "<=", "!=": - var exprFunc func(iterator, interface{}, interface{}) interface{} - switch root.Op { - case "=": - exprFunc = eqFunc - case ">": - exprFunc = gtFunc - case ">=": - exprFunc = geFunc - case "<": - exprFunc = ltFunc - case "<=": - exprFunc = leFunc - case "!=": - exprFunc = neFunc - } - qyOutput = &logicalQuery{Left: left, Right: right, Do: exprFunc} - case "or", "and": - isOr := false - if root.Op == "or" { - isOr = true - } - qyOutput = &booleanQuery{Left: left, Right: right, IsOr: isOr} - case "|": - qyOutput = &unionQuery{Left: left, Right: right} - } - return qyOutput, nil -} - -func (b *builder) processNode(root node) (q query, err error) { - if b.depth = b.depth + 1; b.depth > 1024 { - err = errors.New("the xpath expressions is too complex") - return - } - - switch root.Type() { - case nodeConstantOperand: - n := root.(*operandNode) - q = &constantQuery{Val: n.Val} - case nodeRoot: - q = &contextQuery{Root: true} - case nodeAxis: - q, err = b.processAxisNode(root.(*axisNode)) - b.firstInput = q - case nodeFilter: - q, err = b.processFilterNode(root.(*filterNode)) - case nodeFunction: - q, err = b.processFunctionNode(root.(*functionNode)) - case nodeOperator: - q, err = b.processOperatorNode(root.(*operatorNode)) - } - return -} - -// build builds a specified XPath expressions expr. -func build(expr string) (q query, err error) { - defer func() { - if e := recover(); e != nil { - switch x := e.(type) { - case string: - err = errors.New(x) - case error: - err = x - default: - err = errors.New("unknown panic") - } - } - }() - root := parse(expr) - b := &builder{} - return b.processNode(root) -} diff --git a/vendor/github.com/antchfx/xpath/func.go b/vendor/github.com/antchfx/xpath/func.go deleted file mode 100644 index bcfee55..0000000 --- a/vendor/github.com/antchfx/xpath/func.go +++ /dev/null @@ -1,585 +0,0 @@ -package xpath - -import ( - "errors" - "fmt" - "math" - "strconv" - "strings" - "sync" - "unicode" -) - -// Defined an interface of stringBuilder that compatible with -// strings.Builder(go 1.10) and bytes.Buffer(< go 1.10) -type stringBuilder interface { - WriteRune(r rune) (n int, err error) - WriteString(s string) (int, error) - Reset() - Grow(n int) - String() string -} - -var builderPool = sync.Pool{New: func() interface{} { - return newStringBuilder() -}} - -// The XPath function list. - -func predicate(q query) func(NodeNavigator) bool { - type Predicater interface { - Test(NodeNavigator) bool - } - if p, ok := q.(Predicater); ok { - return p.Test - } - return func(NodeNavigator) bool { return true } -} - -// positionFunc is a XPath Node Set functions position(). -func positionFunc(q query, t iterator) interface{} { - var ( - count = 1 - node = t.Current().Copy() - ) - test := predicate(q) - for node.MoveToPrevious() { - if test(node) { - count++ - } - } - return float64(count) -} - -// lastFunc is a XPath Node Set functions last(). -func lastFunc(q query, t iterator) interface{} { - var ( - count = 0 - node = t.Current().Copy() - ) - node.MoveToFirst() - test := predicate(q) - for { - if test(node) { - count++ - } - if !node.MoveToNext() { - break - } - } - return float64(count) -} - -// countFunc is a XPath Node Set functions count(node-set). -func countFunc(q query, t iterator) interface{} { - var count = 0 - q = functionArgs(q) - test := predicate(q) - switch typ := q.Evaluate(t).(type) { - case query: - for node := typ.Select(t); node != nil; node = typ.Select(t) { - if test(node) { - count++ - } - } - } - return float64(count) -} - -// sumFunc is a XPath Node Set functions sum(node-set). -func sumFunc(q query, t iterator) interface{} { - var sum float64 - switch typ := functionArgs(q).Evaluate(t).(type) { - case query: - for node := typ.Select(t); node != nil; node = typ.Select(t) { - if v, err := strconv.ParseFloat(node.Value(), 64); err == nil { - sum += v - } - } - case float64: - sum = typ - case string: - v, err := strconv.ParseFloat(typ, 64) - if err != nil { - panic(errors.New("sum() function argument type must be a node-set or number")) - } - sum = v - } - return sum -} - -func asNumber(t iterator, o interface{}) float64 { - switch typ := o.(type) { - case query: - node := typ.Select(t) - if node == nil { - return float64(0) - } - if v, err := strconv.ParseFloat(node.Value(), 64); err == nil { - return v - } - case float64: - return typ - case string: - v, err := strconv.ParseFloat(typ, 64) - if err != nil { - panic(errors.New("ceiling() function argument type must be a node-set or number")) - } - return v - } - return 0 -} - -// ceilingFunc is a XPath Node Set functions ceiling(node-set). -func ceilingFunc(q query, t iterator) interface{} { - val := asNumber(t, functionArgs(q).Evaluate(t)) - return math.Ceil(val) -} - -// floorFunc is a XPath Node Set functions floor(node-set). -func floorFunc(q query, t iterator) interface{} { - val := asNumber(t, functionArgs(q).Evaluate(t)) - return math.Floor(val) -} - -// roundFunc is a XPath Node Set functions round(node-set). -func roundFunc(q query, t iterator) interface{} { - val := asNumber(t, functionArgs(q).Evaluate(t)) - //return math.Round(val) - return round(val) -} - -// nameFunc is a XPath functions name([node-set]). -func nameFunc(arg query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { - var v NodeNavigator - if arg == nil { - v = t.Current() - } else { - v = arg.Select(t) - if v == nil { - return "" - } - } - ns := v.Prefix() - if ns == "" { - return v.LocalName() - } - return ns + ":" + v.LocalName() - } -} - -// localNameFunc is a XPath functions local-name([node-set]). -func localNameFunc(arg query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { - var v NodeNavigator - if arg == nil { - v = t.Current() - } else { - v = arg.Select(t) - if v == nil { - return "" - } - } - return v.LocalName() - } -} - -// namespaceFunc is a XPath functions namespace-uri([node-set]). -func namespaceFunc(arg query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { - var v NodeNavigator - if arg == nil { - v = t.Current() - } else { - // Get the first node in the node-set if specified. - v = arg.Select(t) - if v == nil { - return "" - } - } - // fix about namespace-uri() bug: https://github.com/antchfx/xmlquery/issues/22 - // TODO: In the next version, add NamespaceURL() to the NodeNavigator interface. - type namespaceURL interface { - NamespaceURL() string - } - if f, ok := v.(namespaceURL); ok { - return f.NamespaceURL() - } - return v.Prefix() - } -} - -func asBool(t iterator, v interface{}) bool { - switch v := v.(type) { - case nil: - return false - case *NodeIterator: - return v.MoveNext() - case bool: - return v - case float64: - return v != 0 - case string: - return v != "" - case query: - return v.Select(t) != nil - default: - panic(fmt.Errorf("unexpected type: %T", v)) - } -} - -func asString(t iterator, v interface{}) string { - switch v := v.(type) { - case nil: - return "" - case bool: - if v { - return "true" - } - return "false" - case float64: - return strconv.FormatFloat(v, 'g', -1, 64) - case string: - return v - case query: - node := v.Select(t) - if node == nil { - return "" - } - return node.Value() - default: - panic(fmt.Errorf("unexpected type: %T", v)) - } -} - -// booleanFunc is a XPath functions boolean([node-set]). -func booleanFunc(q query, t iterator) interface{} { - v := functionArgs(q).Evaluate(t) - return asBool(t, v) -} - -// numberFunc is a XPath functions number([node-set]). -func numberFunc(q query, t iterator) interface{} { - v := functionArgs(q).Evaluate(t) - return asNumber(t, v) -} - -// stringFunc is a XPath functions string([node-set]). -func stringFunc(q query, t iterator) interface{} { - v := functionArgs(q).Evaluate(t) - return asString(t, v) -} - -// startwithFunc is a XPath functions starts-with(string, string). -func startwithFunc(arg1, arg2 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { - var ( - m, n string - ok bool - ) - switch typ := functionArgs(arg1).Evaluate(t).(type) { - case string: - m = typ - case query: - node := typ.Select(t) - if node == nil { - return false - } - m = node.Value() - default: - panic(errors.New("starts-with() function argument type must be string")) - } - n, ok = functionArgs(arg2).Evaluate(t).(string) - if !ok { - panic(errors.New("starts-with() function argument type must be string")) - } - return strings.HasPrefix(m, n) - } -} - -// endwithFunc is a XPath functions ends-with(string, string). -func endwithFunc(arg1, arg2 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { - var ( - m, n string - ok bool - ) - switch typ := functionArgs(arg1).Evaluate(t).(type) { - case string: - m = typ - case query: - node := typ.Select(t) - if node == nil { - return false - } - m = node.Value() - default: - panic(errors.New("ends-with() function argument type must be string")) - } - n, ok = functionArgs(arg2).Evaluate(t).(string) - if !ok { - panic(errors.New("ends-with() function argument type must be string")) - } - return strings.HasSuffix(m, n) - } -} - -// containsFunc is a XPath functions contains(string or @attr, string). -func containsFunc(arg1, arg2 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { - var ( - m, n string - ok bool - ) - switch typ := functionArgs(arg1).Evaluate(t).(type) { - case string: - m = typ - case query: - node := typ.Select(t) - if node == nil { - return false - } - m = node.Value() - default: - panic(errors.New("contains() function argument type must be string")) - } - - n, ok = functionArgs(arg2).Evaluate(t).(string) - if !ok { - panic(errors.New("contains() function argument type must be string")) - } - - return strings.Contains(m, n) - } -} - -// normalizespaceFunc is XPath functions normalize-space(string?) -func normalizespaceFunc(q query, t iterator) interface{} { - var m string - switch typ := functionArgs(q).Evaluate(t).(type) { - case string: - m = typ - case query: - node := typ.Select(t) - if node == nil { - return "" - } - m = node.Value() - } - var b = builderPool.Get().(stringBuilder) - b.Grow(len(m)) - - runeStr := []rune(strings.TrimSpace(m)) - l := len(runeStr) - for i := range runeStr { - r := runeStr[i] - isSpace := unicode.IsSpace(r) - if !(isSpace && (i+1 < l && unicode.IsSpace(runeStr[i+1]))) { - if isSpace { - r = ' ' - } - b.WriteRune(r) - } - } - result := b.String() - b.Reset() - builderPool.Put(b) - - return result -} - -// substringFunc is XPath functions substring function returns a part of a given string. -func substringFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { - var m string - switch typ := functionArgs(arg1).Evaluate(t).(type) { - case string: - m = typ - case query: - node := typ.Select(t) - if node == nil { - return "" - } - m = node.Value() - } - - var start, length float64 - var ok bool - - if start, ok = functionArgs(arg2).Evaluate(t).(float64); !ok { - panic(errors.New("substring() function first argument type must be int")) - } else if start < 1 { - panic(errors.New("substring() function first argument type must be >= 1")) - } - start-- - if arg3 != nil { - if length, ok = functionArgs(arg3).Evaluate(t).(float64); !ok { - panic(errors.New("substring() function second argument type must be int")) - } - } - if (len(m) - int(start)) < int(length) { - panic(errors.New("substring() function start and length argument out of range")) - } - if length > 0 { - return m[int(start):int(length+start)] - } - return m[int(start):] - } -} - -// substringIndFunc is XPath functions substring-before/substring-after function returns a part of a given string. -func substringIndFunc(arg1, arg2 query, after bool) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { - var str string - switch v := functionArgs(arg1).Evaluate(t).(type) { - case string: - str = v - case query: - node := v.Select(t) - if node == nil { - return "" - } - str = node.Value() - } - var word string - switch v := functionArgs(arg2).Evaluate(t).(type) { - case string: - word = v - case query: - node := v.Select(t) - if node == nil { - return "" - } - word = node.Value() - } - if word == "" { - return "" - } - - i := strings.Index(str, word) - if i < 0 { - return "" - } - if after { - return str[i+len(word):] - } - return str[:i] - } -} - -// stringLengthFunc is XPATH string-length( [string] ) function that returns a number -// equal to the number of characters in a given string. -func stringLengthFunc(arg1 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { - switch v := functionArgs(arg1).Evaluate(t).(type) { - case string: - return float64(len(v)) - case query: - node := v.Select(t) - if node == nil { - break - } - return float64(len(node.Value())) - } - return float64(0) - } -} - -// translateFunc is XPath functions translate() function returns a replaced string. -func translateFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { - str := asString(t, functionArgs(arg1).Evaluate(t)) - src := asString(t, functionArgs(arg2).Evaluate(t)) - dst := asString(t, functionArgs(arg3).Evaluate(t)) - - replace := make([]string, 0, len(src)) - for i, s := range src { - d := "" - if i < len(dst) { - d = string(dst[i]) - } - replace = append(replace, string(s), d) - } - return strings.NewReplacer(replace...).Replace(str) - } -} - -// replaceFunc is XPath functions replace() function returns a replaced string. -func replaceFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { - str := asString(t, functionArgs(arg1).Evaluate(t)) - src := asString(t, functionArgs(arg2).Evaluate(t)) - dst := asString(t, functionArgs(arg3).Evaluate(t)) - - return strings.Replace(str, src, dst, -1) - } -} - -// notFunc is XPATH functions not(expression) function operation. -func notFunc(q query, t iterator) interface{} { - switch v := functionArgs(q).Evaluate(t).(type) { - case bool: - return !v - case query: - node := v.Select(t) - return node == nil - default: - return false - } -} - -// concatFunc is the concat function concatenates two or more -// strings and returns the resulting string. -// concat( string1 , string2 [, stringn]* ) -func concatFunc(args ...query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { - b := builderPool.Get().(stringBuilder) - for _, v := range args { - v = functionArgs(v) - - switch v := v.Evaluate(t).(type) { - case string: - b.WriteString(v) - case query: - node := v.Select(t) - if node != nil { - b.WriteString(node.Value()) - } - } - } - result := b.String() - b.Reset() - builderPool.Put(b) - - return result - } -} - -// https://github.com/antchfx/xpath/issues/43 -func functionArgs(q query) query { - if _, ok := q.(*functionQuery); ok { - return q - } - return q.Clone() -} - -func reverseFunc(q query, t iterator) func() NodeNavigator { - var list []NodeNavigator - for { - node := q.Select(t) - if node == nil { - break - } - list = append(list, node.Copy()) - } - i := len(list) - return func() NodeNavigator { - if i <= 0 { - return nil - } - i-- - node := list[i] - return node - } -} diff --git a/vendor/github.com/antchfx/xpath/func_go110.go b/vendor/github.com/antchfx/xpath/func_go110.go deleted file mode 100644 index 6df30d3..0000000 --- a/vendor/github.com/antchfx/xpath/func_go110.go +++ /dev/null @@ -1,16 +0,0 @@ -// +build go1.10 - -package xpath - -import ( - "math" - "strings" -) - -func round(f float64) int { - return int(math.Round(f)) -} - -func newStringBuilder() stringBuilder{ - return &strings.Builder{} -} diff --git a/vendor/github.com/antchfx/xpath/func_pre_go110.go b/vendor/github.com/antchfx/xpath/func_pre_go110.go deleted file mode 100644 index 335141f..0000000 --- a/vendor/github.com/antchfx/xpath/func_pre_go110.go +++ /dev/null @@ -1,22 +0,0 @@ -// +build !go1.10 - -package xpath - -import ( - "bytes" - "math" -) - -// math.Round() is supported by Go 1.10+, -// This method just compatible for version <1.10. -// https://github.com/golang/go/issues/20100 -func round(f float64) int { - if math.Abs(f) < 0.5 { - return 0 - } - return int(f + math.Copysign(0.5, f)) -} - -func newStringBuilder() stringBuilder { - return &bytes.Buffer{} -} diff --git a/vendor/github.com/antchfx/xpath/operator.go b/vendor/github.com/antchfx/xpath/operator.go deleted file mode 100644 index 8c2f31f..0000000 --- a/vendor/github.com/antchfx/xpath/operator.go +++ /dev/null @@ -1,305 +0,0 @@ -package xpath - -import ( - "fmt" - "reflect" - "strconv" -) - -// The XPath number operator function list. - -// valueType is a return value type. -type valueType int - -const ( - booleanType valueType = iota - numberType - stringType - nodeSetType -) - -func getValueType(i interface{}) valueType { - v := reflect.ValueOf(i) - switch v.Kind() { - case reflect.Float64: - return numberType - case reflect.String: - return stringType - case reflect.Bool: - return booleanType - default: - if _, ok := i.(query); ok { - return nodeSetType - } - } - panic(fmt.Errorf("xpath unknown value type: %v", v.Kind())) -} - -type logical func(iterator, string, interface{}, interface{}) bool - -var logicalFuncs = [][]logical{ - {cmpBooleanBoolean, nil, nil, nil}, - {nil, cmpNumericNumeric, cmpNumericString, cmpNumericNodeSet}, - {nil, cmpStringNumeric, cmpStringString, cmpStringNodeSet}, - {nil, cmpNodeSetNumeric, cmpNodeSetString, cmpNodeSetNodeSet}, -} - -// number vs number -func cmpNumberNumberF(op string, a, b float64) bool { - switch op { - case "=": - return a == b - case ">": - return a > b - case "<": - return a < b - case ">=": - return a >= b - case "<=": - return a <= b - case "!=": - return a != b - } - return false -} - -// string vs string -func cmpStringStringF(op string, a, b string) bool { - switch op { - case "=": - return a == b - case ">": - return a > b - case "<": - return a < b - case ">=": - return a >= b - case "<=": - return a <= b - case "!=": - return a != b - } - return false -} - -func cmpBooleanBooleanF(op string, a, b bool) bool { - switch op { - case "or": - return a || b - case "and": - return a && b - } - return false -} - -func cmpNumericNumeric(t iterator, op string, m, n interface{}) bool { - a := m.(float64) - b := n.(float64) - return cmpNumberNumberF(op, a, b) -} - -func cmpNumericString(t iterator, op string, m, n interface{}) bool { - a := m.(float64) - b := n.(string) - num, err := strconv.ParseFloat(b, 64) - if err != nil { - panic(err) - } - return cmpNumberNumberF(op, a, num) -} - -func cmpNumericNodeSet(t iterator, op string, m, n interface{}) bool { - a := m.(float64) - b := n.(query) - - for { - node := b.Select(t) - if node == nil { - break - } - num, err := strconv.ParseFloat(node.Value(), 64) - if err != nil { - panic(err) - } - if cmpNumberNumberF(op, a, num) { - return true - } - } - return false -} - -func cmpNodeSetNumeric(t iterator, op string, m, n interface{}) bool { - a := m.(query) - b := n.(float64) - for { - node := a.Select(t) - if node == nil { - break - } - num, err := strconv.ParseFloat(node.Value(), 64) - if err != nil { - panic(err) - } - if cmpNumberNumberF(op, num, b) { - return true - } - } - return false -} - -func cmpNodeSetString(t iterator, op string, m, n interface{}) bool { - a := m.(query) - b := n.(string) - for { - node := a.Select(t) - if node == nil { - break - } - if cmpStringStringF(op, b, node.Value()) { - return true - } - } - return false -} - -func cmpNodeSetNodeSet(t iterator, op string, m, n interface{}) bool { - a := m.(query) - b := n.(query) - x := a.Select(t) - if x == nil { - return false - } - y := b.Select(t) - if y == nil { - return false - } - return cmpStringStringF(op, x.Value(), y.Value()) -} - -func cmpStringNumeric(t iterator, op string, m, n interface{}) bool { - a := m.(string) - b := n.(float64) - num, err := strconv.ParseFloat(a, 64) - if err != nil { - panic(err) - } - return cmpNumberNumberF(op, b, num) -} - -func cmpStringString(t iterator, op string, m, n interface{}) bool { - a := m.(string) - b := n.(string) - return cmpStringStringF(op, a, b) -} - -func cmpStringNodeSet(t iterator, op string, m, n interface{}) bool { - a := m.(string) - b := n.(query) - for { - node := b.Select(t) - if node == nil { - break - } - if cmpStringStringF(op, a, node.Value()) { - return true - } - } - return false -} - -func cmpBooleanBoolean(t iterator, op string, m, n interface{}) bool { - a := m.(bool) - b := n.(bool) - return cmpBooleanBooleanF(op, a, b) -} - -// eqFunc is an `=` operator. -func eqFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) - return logicalFuncs[t1][t2](t, "=", m, n) -} - -// gtFunc is an `>` operator. -func gtFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) - return logicalFuncs[t1][t2](t, ">", m, n) -} - -// geFunc is an `>=` operator. -func geFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) - return logicalFuncs[t1][t2](t, ">=", m, n) -} - -// ltFunc is an `<` operator. -func ltFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) - return logicalFuncs[t1][t2](t, "<", m, n) -} - -// leFunc is an `<=` operator. -func leFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) - return logicalFuncs[t1][t2](t, "<=", m, n) -} - -// neFunc is an `!=` operator. -func neFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) - return logicalFuncs[t1][t2](t, "!=", m, n) -} - -// orFunc is an `or` operator. -var orFunc = func(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) - return logicalFuncs[t1][t2](t, "or", m, n) -} - -func numericExpr(m, n interface{}, cb func(float64, float64) float64) float64 { - typ := reflect.TypeOf(float64(0)) - a := reflect.ValueOf(m).Convert(typ) - b := reflect.ValueOf(n).Convert(typ) - return cb(a.Float(), b.Float()) -} - -// plusFunc is an `+` operator. -var plusFunc = func(m, n interface{}) interface{} { - return numericExpr(m, n, func(a, b float64) float64 { - return a + b - }) -} - -// minusFunc is an `-` operator. -var minusFunc = func(m, n interface{}) interface{} { - return numericExpr(m, n, func(a, b float64) float64 { - return a - b - }) -} - -// mulFunc is an `*` operator. -var mulFunc = func(m, n interface{}) interface{} { - return numericExpr(m, n, func(a, b float64) float64 { - return a * b - }) -} - -// divFunc is an `DIV` operator. -var divFunc = func(m, n interface{}) interface{} { - return numericExpr(m, n, func(a, b float64) float64 { - return a / b - }) -} - -// modFunc is an 'MOD' operator. -var modFunc = func(m, n interface{}) interface{} { - return numericExpr(m, n, func(a, b float64) float64 { - return float64(int(a) % int(b)) - }) -} diff --git a/vendor/github.com/antchfx/xpath/parse.go b/vendor/github.com/antchfx/xpath/parse.go deleted file mode 100644 index fb9abe3..0000000 --- a/vendor/github.com/antchfx/xpath/parse.go +++ /dev/null @@ -1,1186 +0,0 @@ -package xpath - -import ( - "bytes" - "errors" - "fmt" - "strconv" - "unicode" -) - -// A XPath expression token type. -type itemType int - -const ( - itemComma itemType = iota // ',' - itemSlash // '/' - itemAt // '@' - itemDot // '.' - itemLParens // '(' - itemRParens // ')' - itemLBracket // '[' - itemRBracket // ']' - itemStar // '*' - itemPlus // '+' - itemMinus // '-' - itemEq // '=' - itemLt // '<' - itemGt // '>' - itemBang // '!' - itemDollar // '$' - itemApos // '\'' - itemQuote // '"' - itemUnion // '|' - itemNe // '!=' - itemLe // '<=' - itemGe // '>=' - itemAnd // '&&' - itemOr // '||' - itemDotDot // '..' - itemSlashSlash // '//' - itemName // XML Name - itemString // Quoted string constant - itemNumber // Number constant - itemAxe // Axe (like child::) - itemEOF // END -) - -// A node is an XPath node in the parse tree. -type node interface { - Type() nodeType -} - -// nodeType identifies the type of a parse tree node. -type nodeType int - -func (t nodeType) Type() nodeType { - return t -} - -const ( - nodeRoot nodeType = iota - nodeAxis - nodeFilter - nodeFunction - nodeOperator - nodeVariable - nodeConstantOperand -) - -type parser struct { - r *scanner - d int -} - -// newOperatorNode returns new operator node OperatorNode. -func newOperatorNode(op string, left, right node) node { - return &operatorNode{nodeType: nodeOperator, Op: op, Left: left, Right: right} -} - -// newOperand returns new constant operand node OperandNode. -func newOperandNode(v interface{}) node { - return &operandNode{nodeType: nodeConstantOperand, Val: v} -} - -// newAxisNode returns new axis node AxisNode. -func newAxisNode(axeTyp, localName, prefix, prop string, n node) node { - return &axisNode{ - nodeType: nodeAxis, - LocalName: localName, - Prefix: prefix, - AxeType: axeTyp, - Prop: prop, - Input: n, - } -} - -// newVariableNode returns new variable node VariableNode. -func newVariableNode(prefix, name string) node { - return &variableNode{nodeType: nodeVariable, Name: name, Prefix: prefix} -} - -// newFilterNode returns a new filter node FilterNode. -func newFilterNode(n, m node) node { - return &filterNode{nodeType: nodeFilter, Input: n, Condition: m} -} - -// newRootNode returns a root node. -func newRootNode(s string) node { - return &rootNode{nodeType: nodeRoot, slash: s} -} - -// newFunctionNode returns function call node. -func newFunctionNode(name, prefix string, args []node) node { - return &functionNode{nodeType: nodeFunction, Prefix: prefix, FuncName: name, Args: args} -} - -// testOp reports whether current item name is an operand op. -func testOp(r *scanner, op string) bool { - return r.typ == itemName && r.prefix == "" && r.name == op -} - -func isPrimaryExpr(r *scanner) bool { - switch r.typ { - case itemString, itemNumber, itemDollar, itemLParens: - return true - case itemName: - return r.canBeFunc && !isNodeType(r) - } - return false -} - -func isNodeType(r *scanner) bool { - switch r.name { - case "node", "text", "processing-instruction", "comment": - return r.prefix == "" - } - return false -} - -func isStep(item itemType) bool { - switch item { - case itemDot, itemDotDot, itemAt, itemAxe, itemStar, itemName: - return true - } - return false -} - -func checkItem(r *scanner, typ itemType) { - if r.typ != typ { - panic(fmt.Sprintf("%s has an invalid token", r.text)) - } -} - -// parseExpression parsing the expression with input node n. -func (p *parser) parseExpression(n node) node { - if p.d = p.d + 1; p.d > 200 { - panic("the xpath query is too complex(depth > 200)") - } - n = p.parseOrExpr(n) - p.d-- - return n -} - -// next scanning next item on forward. -func (p *parser) next() bool { - return p.r.nextItem() -} - -func (p *parser) skipItem(typ itemType) { - checkItem(p.r, typ) - p.next() -} - -// OrExpr ::= AndExpr | OrExpr 'or' AndExpr -func (p *parser) parseOrExpr(n node) node { - opnd := p.parseAndExpr(n) - for { - if !testOp(p.r, "or") { - break - } - p.next() - opnd = newOperatorNode("or", opnd, p.parseAndExpr(n)) - } - return opnd -} - -// AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr -func (p *parser) parseAndExpr(n node) node { - opnd := p.parseEqualityExpr(n) - for { - if !testOp(p.r, "and") { - break - } - p.next() - opnd = newOperatorNode("and", opnd, p.parseEqualityExpr(n)) - } - return opnd -} - -// EqualityExpr ::= RelationalExpr | EqualityExpr '=' RelationalExpr | EqualityExpr '!=' RelationalExpr -func (p *parser) parseEqualityExpr(n node) node { - opnd := p.parseRelationalExpr(n) -Loop: - for { - var op string - switch p.r.typ { - case itemEq: - op = "=" - case itemNe: - op = "!=" - default: - break Loop - } - p.next() - opnd = newOperatorNode(op, opnd, p.parseRelationalExpr(n)) - } - return opnd -} - -// RelationalExpr ::= AdditiveExpr | RelationalExpr '<' AdditiveExpr | RelationalExpr '>' AdditiveExpr -// | RelationalExpr '<=' AdditiveExpr -// | RelationalExpr '>=' AdditiveExpr -func (p *parser) parseRelationalExpr(n node) node { - opnd := p.parseAdditiveExpr(n) -Loop: - for { - var op string - switch p.r.typ { - case itemLt: - op = "<" - case itemGt: - op = ">" - case itemLe: - op = "<=" - case itemGe: - op = ">=" - default: - break Loop - } - p.next() - opnd = newOperatorNode(op, opnd, p.parseAdditiveExpr(n)) - } - return opnd -} - -// AdditiveExpr ::= MultiplicativeExpr | AdditiveExpr '+' MultiplicativeExpr | AdditiveExpr '-' MultiplicativeExpr -func (p *parser) parseAdditiveExpr(n node) node { - opnd := p.parseMultiplicativeExpr(n) -Loop: - for { - var op string - switch p.r.typ { - case itemPlus: - op = "+" - case itemMinus: - op = "-" - default: - break Loop - } - p.next() - opnd = newOperatorNode(op, opnd, p.parseMultiplicativeExpr(n)) - } - return opnd -} - -// MultiplicativeExpr ::= UnaryExpr | MultiplicativeExpr MultiplyOperator(*) UnaryExpr -// | MultiplicativeExpr 'div' UnaryExpr | MultiplicativeExpr 'mod' UnaryExpr -func (p *parser) parseMultiplicativeExpr(n node) node { - opnd := p.parseUnaryExpr(n) -Loop: - for { - var op string - if p.r.typ == itemStar { - op = "*" - } else if testOp(p.r, "div") || testOp(p.r, "mod") { - op = p.r.name - } else { - break Loop - } - p.next() - opnd = newOperatorNode(op, opnd, p.parseUnaryExpr(n)) - } - return opnd -} - -// UnaryExpr ::= UnionExpr | '-' UnaryExpr -func (p *parser) parseUnaryExpr(n node) node { - minus := false - // ignore '-' sequence - for p.r.typ == itemMinus { - p.next() - minus = !minus - } - opnd := p.parseUnionExpr(n) - if minus { - opnd = newOperatorNode("*", opnd, newOperandNode(float64(-1))) - } - return opnd -} - -// UnionExpr ::= PathExpr | UnionExpr '|' PathExpr -func (p *parser) parseUnionExpr(n node) node { - opnd := p.parsePathExpr(n) -Loop: - for { - if p.r.typ != itemUnion { - break Loop - } - p.next() - opnd2 := p.parsePathExpr(n) - // Checking the node type that must be is node set type? - opnd = newOperatorNode("|", opnd, opnd2) - } - return opnd -} - -// PathExpr ::= LocationPath | FilterExpr | FilterExpr '/' RelativeLocationPath | FilterExpr '//' RelativeLocationPath -func (p *parser) parsePathExpr(n node) node { - var opnd node - if isPrimaryExpr(p.r) { - opnd = p.parseFilterExpr(n) - switch p.r.typ { - case itemSlash: - p.next() - opnd = p.parseRelativeLocationPath(opnd) - case itemSlashSlash: - p.next() - opnd = p.parseRelativeLocationPath(newAxisNode("descendant-or-self", "", "", "", opnd)) - } - } else { - opnd = p.parseLocationPath(nil) - } - return opnd -} - -// FilterExpr ::= PrimaryExpr | FilterExpr Predicate -func (p *parser) parseFilterExpr(n node) node { - opnd := p.parsePrimaryExpr(n) - if p.r.typ == itemLBracket { - opnd = newFilterNode(opnd, p.parsePredicate(opnd)) - } - return opnd -} - -// Predicate ::= '[' PredicateExpr ']' -func (p *parser) parsePredicate(n node) node { - p.skipItem(itemLBracket) - opnd := p.parseExpression(n) - p.skipItem(itemRBracket) - return opnd -} - -// LocationPath ::= RelativeLocationPath | AbsoluteLocationPath -func (p *parser) parseLocationPath(n node) (opnd node) { - switch p.r.typ { - case itemSlash: - p.next() - opnd = newRootNode("/") - if isStep(p.r.typ) { - opnd = p.parseRelativeLocationPath(opnd) // ?? child:: or self ?? - } - case itemSlashSlash: - p.next() - opnd = newRootNode("//") - opnd = p.parseRelativeLocationPath(newAxisNode("descendant-or-self", "", "", "", opnd)) - default: - opnd = p.parseRelativeLocationPath(n) - } - return opnd -} - -// RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | AbbreviatedRelativeLocationPath -func (p *parser) parseRelativeLocationPath(n node) node { - opnd := n -Loop: - for { - opnd = p.parseStep(opnd) - switch p.r.typ { - case itemSlashSlash: - p.next() - opnd = newAxisNode("descendant-or-self", "", "", "", opnd) - case itemSlash: - p.next() - default: - break Loop - } - } - return opnd -} - -// Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep -func (p *parser) parseStep(n node) (opnd node) { - axeTyp := "child" // default axes value. - if p.r.typ == itemDot || p.r.typ == itemDotDot { - if p.r.typ == itemDot { - axeTyp = "self" - } else { - axeTyp = "parent" - } - p.next() - opnd = newAxisNode(axeTyp, "", "", "", n) - if p.r.typ != itemLBracket { - return opnd - } - } else { - switch p.r.typ { - case itemAt: - p.next() - axeTyp = "attribute" - case itemAxe: - axeTyp = p.r.name - p.next() - case itemLParens: - return p.parseSequence(n) - } - opnd = p.parseNodeTest(n, axeTyp) - } - for p.r.typ == itemLBracket { - opnd = newFilterNode(opnd, p.parsePredicate(opnd)) - } - return opnd -} - -// Expr ::= '(' Step ("," Step)* ')' -func (p *parser) parseSequence(n node) (opnd node) { - p.skipItem(itemLParens) - opnd = p.parseStep(n) - for { - if p.r.typ != itemComma { - break - } - p.next() - opnd2 := p.parseStep(n) - opnd = newOperatorNode("|", opnd, opnd2) - } - p.skipItem(itemRParens) - return opnd -} - -// NodeTest ::= NameTest | nodeType '(' ')' | 'processing-instruction' '(' Literal ')' -func (p *parser) parseNodeTest(n node, axeTyp string) (opnd node) { - switch p.r.typ { - case itemName: - if p.r.canBeFunc && isNodeType(p.r) { - var prop string - switch p.r.name { - case "comment", "text", "processing-instruction", "node": - prop = p.r.name - } - var name string - p.next() - p.skipItem(itemLParens) - if prop == "processing-instruction" && p.r.typ != itemRParens { - checkItem(p.r, itemString) - name = p.r.strval - p.next() - } - p.skipItem(itemRParens) - opnd = newAxisNode(axeTyp, name, "", prop, n) - } else { - prefix := p.r.prefix - name := p.r.name - p.next() - if p.r.name == "*" { - name = "" - } - opnd = newAxisNode(axeTyp, name, prefix, "", n) - } - case itemStar: - opnd = newAxisNode(axeTyp, "", "", "", n) - p.next() - default: - panic("expression must evaluate to a node-set") - } - return opnd -} - -// PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall -func (p *parser) parsePrimaryExpr(n node) (opnd node) { - switch p.r.typ { - case itemString: - opnd = newOperandNode(p.r.strval) - p.next() - case itemNumber: - opnd = newOperandNode(p.r.numval) - p.next() - case itemDollar: - p.next() - checkItem(p.r, itemName) - opnd = newVariableNode(p.r.prefix, p.r.name) - p.next() - case itemLParens: - p.next() - opnd = p.parseExpression(n) - p.skipItem(itemRParens) - case itemName: - if p.r.canBeFunc && !isNodeType(p.r) { - opnd = p.parseMethod(nil) - } - } - return opnd -} - -// FunctionCall ::= FunctionName '(' ( Argument ( ',' Argument )* )? ')' -func (p *parser) parseMethod(n node) node { - var args []node - name := p.r.name - prefix := p.r.prefix - - p.skipItem(itemName) - p.skipItem(itemLParens) - if p.r.typ != itemRParens { - for { - args = append(args, p.parseExpression(n)) - if p.r.typ == itemRParens { - break - } - p.skipItem(itemComma) - } - } - p.skipItem(itemRParens) - return newFunctionNode(name, prefix, args) -} - -// Parse parsing the XPath express string expr and returns a tree node. -func parse(expr string) node { - r := &scanner{text: expr} - r.nextChar() - r.nextItem() - p := &parser{r: r} - return p.parseExpression(nil) -} - -// rootNode holds a top-level node of tree. -type rootNode struct { - nodeType - slash string -} - -func (r *rootNode) String() string { - return r.slash -} - -// operatorNode holds two Nodes operator. -type operatorNode struct { - nodeType - Op string - Left, Right node -} - -func (o *operatorNode) String() string { - return fmt.Sprintf("%v%s%v", o.Left, o.Op, o.Right) -} - -// axisNode holds a location step. -type axisNode struct { - nodeType - Input node - Prop string // node-test name.[comment|text|processing-instruction|node] - AxeType string // name of the axes.[attribute|ancestor|child|....] - LocalName string // local part name of node. - Prefix string // prefix name of node. -} - -func (a *axisNode) String() string { - var b bytes.Buffer - if a.AxeType != "" { - b.Write([]byte(a.AxeType + "::")) - } - if a.Prefix != "" { - b.Write([]byte(a.Prefix + ":")) - } - b.Write([]byte(a.LocalName)) - if a.Prop != "" { - b.Write([]byte("/" + a.Prop + "()")) - } - return b.String() -} - -// operandNode holds a constant operand. -type operandNode struct { - nodeType - Val interface{} -} - -func (o *operandNode) String() string { - return fmt.Sprintf("%v", o.Val) -} - -// filterNode holds a condition filter. -type filterNode struct { - nodeType - Input, Condition node -} - -func (f *filterNode) String() string { - return fmt.Sprintf("%s[%s]", f.Input, f.Condition) -} - -// variableNode holds a variable. -type variableNode struct { - nodeType - Name, Prefix string -} - -func (v *variableNode) String() string { - if v.Prefix == "" { - return v.Name - } - return fmt.Sprintf("%s:%s", v.Prefix, v.Name) -} - -// functionNode holds a function call. -type functionNode struct { - nodeType - Args []node - Prefix string - FuncName string // function name -} - -func (f *functionNode) String() string { - var b bytes.Buffer - // fun(arg1, ..., argn) - b.Write([]byte(f.FuncName)) - b.Write([]byte("(")) - for i, arg := range f.Args { - if i > 0 { - b.Write([]byte(",")) - } - b.Write([]byte(fmt.Sprintf("%s", arg))) - } - b.Write([]byte(")")) - return b.String() -} - -type scanner struct { - text, name, prefix string - - pos int - curr rune - typ itemType - strval string // text value at current pos - numval float64 // number value at current pos - canBeFunc bool -} - -func (s *scanner) nextChar() bool { - if s.pos >= len(s.text) { - s.curr = rune(0) - return false - } - s.curr = rune(s.text[s.pos]) - s.pos++ - return true -} - -func (s *scanner) nextItem() bool { - s.skipSpace() - switch s.curr { - case 0: - s.typ = itemEOF - return false - case ',', '@', '(', ')', '|', '*', '[', ']', '+', '-', '=', '#', '$': - s.typ = asItemType(s.curr) - s.nextChar() - case '<': - s.typ = itemLt - s.nextChar() - if s.curr == '=' { - s.typ = itemLe - s.nextChar() - } - case '>': - s.typ = itemGt - s.nextChar() - if s.curr == '=' { - s.typ = itemGe - s.nextChar() - } - case '!': - s.typ = itemBang - s.nextChar() - if s.curr == '=' { - s.typ = itemNe - s.nextChar() - } - case '.': - s.typ = itemDot - s.nextChar() - if s.curr == '.' { - s.typ = itemDotDot - s.nextChar() - } else if isDigit(s.curr) { - s.typ = itemNumber - s.numval = s.scanFraction() - } - case '/': - s.typ = itemSlash - s.nextChar() - if s.curr == '/' { - s.typ = itemSlashSlash - s.nextChar() - } - case '"', '\'': - s.typ = itemString - s.strval = s.scanString() - default: - if isDigit(s.curr) { - s.typ = itemNumber - s.numval = s.scanNumber() - } else if isName(s.curr) { - s.typ = itemName - s.name = s.scanName() - s.prefix = "" - // "foo:bar" is one itemem not three because it doesn't allow spaces in between - // We should distinct it from "foo::" and need process "foo ::" as well - if s.curr == ':' { - s.nextChar() - // can be "foo:bar" or "foo::" - if s.curr == ':' { - // "foo::" - s.nextChar() - s.typ = itemAxe - } else { // "foo:*", "foo:bar" or "foo: " - s.prefix = s.name - if s.curr == '*' { - s.nextChar() - s.name = "*" - } else if isName(s.curr) { - s.name = s.scanName() - } else { - panic(fmt.Sprintf("%s has an invalid qualified name.", s.text)) - } - } - } else { - s.skipSpace() - if s.curr == ':' { - s.nextChar() - // it can be "foo ::" or just "foo :" - if s.curr == ':' { - s.nextChar() - s.typ = itemAxe - } else { - panic(fmt.Sprintf("%s has an invalid qualified name.", s.text)) - } - } - } - s.skipSpace() - s.canBeFunc = s.curr == '(' - } else { - panic(fmt.Sprintf("%s has an invalid token.", s.text)) - } - } - return true -} - -func (s *scanner) skipSpace() { -Loop: - for { - if !unicode.IsSpace(s.curr) || !s.nextChar() { - break Loop - } - } -} - -func (s *scanner) scanFraction() float64 { - var ( - i = s.pos - 2 - c = 1 // '.' - ) - for isDigit(s.curr) { - s.nextChar() - c++ - } - v, err := strconv.ParseFloat(s.text[i:i+c], 64) - if err != nil { - panic(fmt.Errorf("xpath: scanFraction parse float got error: %v", err)) - } - return v -} - -func (s *scanner) scanNumber() float64 { - var ( - c int - i = s.pos - 1 - ) - for isDigit(s.curr) { - s.nextChar() - c++ - } - if s.curr == '.' { - s.nextChar() - c++ - for isDigit(s.curr) { - s.nextChar() - c++ - } - } - v, err := strconv.ParseFloat(s.text[i:i+c], 64) - if err != nil { - panic(fmt.Errorf("xpath: scanNumber parse float got error: %v", err)) - } - return v -} - -func (s *scanner) scanString() string { - var ( - c = 0 - end = s.curr - ) - s.nextChar() - i := s.pos - 1 - for s.curr != end { - if !s.nextChar() { - panic(errors.New("xpath: scanString got unclosed string")) - } - c++ - } - s.nextChar() - return s.text[i : i+c] -} - -func (s *scanner) scanName() string { - var ( - c int - i = s.pos - 1 - ) - for isName(s.curr) { - c++ - if !s.nextChar() { - break - } - } - return s.text[i : i+c] -} - -func isName(r rune) bool { - return string(r) != ":" && string(r) != "/" && - (unicode.Is(first, r) || unicode.Is(second, r) || string(r) == "*") -} - -func isDigit(r rune) bool { - return unicode.IsDigit(r) -} - -func asItemType(r rune) itemType { - switch r { - case ',': - return itemComma - case '@': - return itemAt - case '(': - return itemLParens - case ')': - return itemRParens - case '|': - return itemUnion - case '*': - return itemStar - case '[': - return itemLBracket - case ']': - return itemRBracket - case '+': - return itemPlus - case '-': - return itemMinus - case '=': - return itemEq - case '$': - return itemDollar - } - panic(fmt.Errorf("unknown item: %v", r)) -} - -var first = &unicode.RangeTable{ - R16: []unicode.Range16{ - {0x003A, 0x003A, 1}, - {0x0041, 0x005A, 1}, - {0x005F, 0x005F, 1}, - {0x0061, 0x007A, 1}, - {0x00C0, 0x00D6, 1}, - {0x00D8, 0x00F6, 1}, - {0x00F8, 0x00FF, 1}, - {0x0100, 0x0131, 1}, - {0x0134, 0x013E, 1}, - {0x0141, 0x0148, 1}, - {0x014A, 0x017E, 1}, - {0x0180, 0x01C3, 1}, - {0x01CD, 0x01F0, 1}, - {0x01F4, 0x01F5, 1}, - {0x01FA, 0x0217, 1}, - {0x0250, 0x02A8, 1}, - {0x02BB, 0x02C1, 1}, - {0x0386, 0x0386, 1}, - {0x0388, 0x038A, 1}, - {0x038C, 0x038C, 1}, - {0x038E, 0x03A1, 1}, - {0x03A3, 0x03CE, 1}, - {0x03D0, 0x03D6, 1}, - {0x03DA, 0x03E0, 2}, - {0x03E2, 0x03F3, 1}, - {0x0401, 0x040C, 1}, - {0x040E, 0x044F, 1}, - {0x0451, 0x045C, 1}, - {0x045E, 0x0481, 1}, - {0x0490, 0x04C4, 1}, - {0x04C7, 0x04C8, 1}, - {0x04CB, 0x04CC, 1}, - {0x04D0, 0x04EB, 1}, - {0x04EE, 0x04F5, 1}, - {0x04F8, 0x04F9, 1}, - {0x0531, 0x0556, 1}, - {0x0559, 0x0559, 1}, - {0x0561, 0x0586, 1}, - {0x05D0, 0x05EA, 1}, - {0x05F0, 0x05F2, 1}, - {0x0621, 0x063A, 1}, - {0x0641, 0x064A, 1}, - {0x0671, 0x06B7, 1}, - {0x06BA, 0x06BE, 1}, - {0x06C0, 0x06CE, 1}, - {0x06D0, 0x06D3, 1}, - {0x06D5, 0x06D5, 1}, - {0x06E5, 0x06E6, 1}, - {0x0905, 0x0939, 1}, - {0x093D, 0x093D, 1}, - {0x0958, 0x0961, 1}, - {0x0985, 0x098C, 1}, - {0x098F, 0x0990, 1}, - {0x0993, 0x09A8, 1}, - {0x09AA, 0x09B0, 1}, - {0x09B2, 0x09B2, 1}, - {0x09B6, 0x09B9, 1}, - {0x09DC, 0x09DD, 1}, - {0x09DF, 0x09E1, 1}, - {0x09F0, 0x09F1, 1}, - {0x0A05, 0x0A0A, 1}, - {0x0A0F, 0x0A10, 1}, - {0x0A13, 0x0A28, 1}, - {0x0A2A, 0x0A30, 1}, - {0x0A32, 0x0A33, 1}, - {0x0A35, 0x0A36, 1}, - {0x0A38, 0x0A39, 1}, - {0x0A59, 0x0A5C, 1}, - {0x0A5E, 0x0A5E, 1}, - {0x0A72, 0x0A74, 1}, - {0x0A85, 0x0A8B, 1}, - {0x0A8D, 0x0A8D, 1}, - {0x0A8F, 0x0A91, 1}, - {0x0A93, 0x0AA8, 1}, - {0x0AAA, 0x0AB0, 1}, - {0x0AB2, 0x0AB3, 1}, - {0x0AB5, 0x0AB9, 1}, - {0x0ABD, 0x0AE0, 0x23}, - {0x0B05, 0x0B0C, 1}, - {0x0B0F, 0x0B10, 1}, - {0x0B13, 0x0B28, 1}, - {0x0B2A, 0x0B30, 1}, - {0x0B32, 0x0B33, 1}, - {0x0B36, 0x0B39, 1}, - {0x0B3D, 0x0B3D, 1}, - {0x0B5C, 0x0B5D, 1}, - {0x0B5F, 0x0B61, 1}, - {0x0B85, 0x0B8A, 1}, - {0x0B8E, 0x0B90, 1}, - {0x0B92, 0x0B95, 1}, - {0x0B99, 0x0B9A, 1}, - {0x0B9C, 0x0B9C, 1}, - {0x0B9E, 0x0B9F, 1}, - {0x0BA3, 0x0BA4, 1}, - {0x0BA8, 0x0BAA, 1}, - {0x0BAE, 0x0BB5, 1}, - {0x0BB7, 0x0BB9, 1}, - {0x0C05, 0x0C0C, 1}, - {0x0C0E, 0x0C10, 1}, - {0x0C12, 0x0C28, 1}, - {0x0C2A, 0x0C33, 1}, - {0x0C35, 0x0C39, 1}, - {0x0C60, 0x0C61, 1}, - {0x0C85, 0x0C8C, 1}, - {0x0C8E, 0x0C90, 1}, - {0x0C92, 0x0CA8, 1}, - {0x0CAA, 0x0CB3, 1}, - {0x0CB5, 0x0CB9, 1}, - {0x0CDE, 0x0CDE, 1}, - {0x0CE0, 0x0CE1, 1}, - {0x0D05, 0x0D0C, 1}, - {0x0D0E, 0x0D10, 1}, - {0x0D12, 0x0D28, 1}, - {0x0D2A, 0x0D39, 1}, - {0x0D60, 0x0D61, 1}, - {0x0E01, 0x0E2E, 1}, - {0x0E30, 0x0E30, 1}, - {0x0E32, 0x0E33, 1}, - {0x0E40, 0x0E45, 1}, - {0x0E81, 0x0E82, 1}, - {0x0E84, 0x0E84, 1}, - {0x0E87, 0x0E88, 1}, - {0x0E8A, 0x0E8D, 3}, - {0x0E94, 0x0E97, 1}, - {0x0E99, 0x0E9F, 1}, - {0x0EA1, 0x0EA3, 1}, - {0x0EA5, 0x0EA7, 2}, - {0x0EAA, 0x0EAB, 1}, - {0x0EAD, 0x0EAE, 1}, - {0x0EB0, 0x0EB0, 1}, - {0x0EB2, 0x0EB3, 1}, - {0x0EBD, 0x0EBD, 1}, - {0x0EC0, 0x0EC4, 1}, - {0x0F40, 0x0F47, 1}, - {0x0F49, 0x0F69, 1}, - {0x10A0, 0x10C5, 1}, - {0x10D0, 0x10F6, 1}, - {0x1100, 0x1100, 1}, - {0x1102, 0x1103, 1}, - {0x1105, 0x1107, 1}, - {0x1109, 0x1109, 1}, - {0x110B, 0x110C, 1}, - {0x110E, 0x1112, 1}, - {0x113C, 0x1140, 2}, - {0x114C, 0x1150, 2}, - {0x1154, 0x1155, 1}, - {0x1159, 0x1159, 1}, - {0x115F, 0x1161, 1}, - {0x1163, 0x1169, 2}, - {0x116D, 0x116E, 1}, - {0x1172, 0x1173, 1}, - {0x1175, 0x119E, 0x119E - 0x1175}, - {0x11A8, 0x11AB, 0x11AB - 0x11A8}, - {0x11AE, 0x11AF, 1}, - {0x11B7, 0x11B8, 1}, - {0x11BA, 0x11BA, 1}, - {0x11BC, 0x11C2, 1}, - {0x11EB, 0x11F0, 0x11F0 - 0x11EB}, - {0x11F9, 0x11F9, 1}, - {0x1E00, 0x1E9B, 1}, - {0x1EA0, 0x1EF9, 1}, - {0x1F00, 0x1F15, 1}, - {0x1F18, 0x1F1D, 1}, - {0x1F20, 0x1F45, 1}, - {0x1F48, 0x1F4D, 1}, - {0x1F50, 0x1F57, 1}, - {0x1F59, 0x1F5B, 0x1F5B - 0x1F59}, - {0x1F5D, 0x1F5D, 1}, - {0x1F5F, 0x1F7D, 1}, - {0x1F80, 0x1FB4, 1}, - {0x1FB6, 0x1FBC, 1}, - {0x1FBE, 0x1FBE, 1}, - {0x1FC2, 0x1FC4, 1}, - {0x1FC6, 0x1FCC, 1}, - {0x1FD0, 0x1FD3, 1}, - {0x1FD6, 0x1FDB, 1}, - {0x1FE0, 0x1FEC, 1}, - {0x1FF2, 0x1FF4, 1}, - {0x1FF6, 0x1FFC, 1}, - {0x2126, 0x2126, 1}, - {0x212A, 0x212B, 1}, - {0x212E, 0x212E, 1}, - {0x2180, 0x2182, 1}, - {0x3007, 0x3007, 1}, - {0x3021, 0x3029, 1}, - {0x3041, 0x3094, 1}, - {0x30A1, 0x30FA, 1}, - {0x3105, 0x312C, 1}, - {0x4E00, 0x9FA5, 1}, - {0xAC00, 0xD7A3, 1}, - }, -} - -var second = &unicode.RangeTable{ - R16: []unicode.Range16{ - {0x002D, 0x002E, 1}, - {0x0030, 0x0039, 1}, - {0x00B7, 0x00B7, 1}, - {0x02D0, 0x02D1, 1}, - {0x0300, 0x0345, 1}, - {0x0360, 0x0361, 1}, - {0x0387, 0x0387, 1}, - {0x0483, 0x0486, 1}, - {0x0591, 0x05A1, 1}, - {0x05A3, 0x05B9, 1}, - {0x05BB, 0x05BD, 1}, - {0x05BF, 0x05BF, 1}, - {0x05C1, 0x05C2, 1}, - {0x05C4, 0x0640, 0x0640 - 0x05C4}, - {0x064B, 0x0652, 1}, - {0x0660, 0x0669, 1}, - {0x0670, 0x0670, 1}, - {0x06D6, 0x06DC, 1}, - {0x06DD, 0x06DF, 1}, - {0x06E0, 0x06E4, 1}, - {0x06E7, 0x06E8, 1}, - {0x06EA, 0x06ED, 1}, - {0x06F0, 0x06F9, 1}, - {0x0901, 0x0903, 1}, - {0x093C, 0x093C, 1}, - {0x093E, 0x094C, 1}, - {0x094D, 0x094D, 1}, - {0x0951, 0x0954, 1}, - {0x0962, 0x0963, 1}, - {0x0966, 0x096F, 1}, - {0x0981, 0x0983, 1}, - {0x09BC, 0x09BC, 1}, - {0x09BE, 0x09BF, 1}, - {0x09C0, 0x09C4, 1}, - {0x09C7, 0x09C8, 1}, - {0x09CB, 0x09CD, 1}, - {0x09D7, 0x09D7, 1}, - {0x09E2, 0x09E3, 1}, - {0x09E6, 0x09EF, 1}, - {0x0A02, 0x0A3C, 0x3A}, - {0x0A3E, 0x0A3F, 1}, - {0x0A40, 0x0A42, 1}, - {0x0A47, 0x0A48, 1}, - {0x0A4B, 0x0A4D, 1}, - {0x0A66, 0x0A6F, 1}, - {0x0A70, 0x0A71, 1}, - {0x0A81, 0x0A83, 1}, - {0x0ABC, 0x0ABC, 1}, - {0x0ABE, 0x0AC5, 1}, - {0x0AC7, 0x0AC9, 1}, - {0x0ACB, 0x0ACD, 1}, - {0x0AE6, 0x0AEF, 1}, - {0x0B01, 0x0B03, 1}, - {0x0B3C, 0x0B3C, 1}, - {0x0B3E, 0x0B43, 1}, - {0x0B47, 0x0B48, 1}, - {0x0B4B, 0x0B4D, 1}, - {0x0B56, 0x0B57, 1}, - {0x0B66, 0x0B6F, 1}, - {0x0B82, 0x0B83, 1}, - {0x0BBE, 0x0BC2, 1}, - {0x0BC6, 0x0BC8, 1}, - {0x0BCA, 0x0BCD, 1}, - {0x0BD7, 0x0BD7, 1}, - {0x0BE7, 0x0BEF, 1}, - {0x0C01, 0x0C03, 1}, - {0x0C3E, 0x0C44, 1}, - {0x0C46, 0x0C48, 1}, - {0x0C4A, 0x0C4D, 1}, - {0x0C55, 0x0C56, 1}, - {0x0C66, 0x0C6F, 1}, - {0x0C82, 0x0C83, 1}, - {0x0CBE, 0x0CC4, 1}, - {0x0CC6, 0x0CC8, 1}, - {0x0CCA, 0x0CCD, 1}, - {0x0CD5, 0x0CD6, 1}, - {0x0CE6, 0x0CEF, 1}, - {0x0D02, 0x0D03, 1}, - {0x0D3E, 0x0D43, 1}, - {0x0D46, 0x0D48, 1}, - {0x0D4A, 0x0D4D, 1}, - {0x0D57, 0x0D57, 1}, - {0x0D66, 0x0D6F, 1}, - {0x0E31, 0x0E31, 1}, - {0x0E34, 0x0E3A, 1}, - {0x0E46, 0x0E46, 1}, - {0x0E47, 0x0E4E, 1}, - {0x0E50, 0x0E59, 1}, - {0x0EB1, 0x0EB1, 1}, - {0x0EB4, 0x0EB9, 1}, - {0x0EBB, 0x0EBC, 1}, - {0x0EC6, 0x0EC6, 1}, - {0x0EC8, 0x0ECD, 1}, - {0x0ED0, 0x0ED9, 1}, - {0x0F18, 0x0F19, 1}, - {0x0F20, 0x0F29, 1}, - {0x0F35, 0x0F39, 2}, - {0x0F3E, 0x0F3F, 1}, - {0x0F71, 0x0F84, 1}, - {0x0F86, 0x0F8B, 1}, - {0x0F90, 0x0F95, 1}, - {0x0F97, 0x0F97, 1}, - {0x0F99, 0x0FAD, 1}, - {0x0FB1, 0x0FB7, 1}, - {0x0FB9, 0x0FB9, 1}, - {0x20D0, 0x20DC, 1}, - {0x20E1, 0x3005, 0x3005 - 0x20E1}, - {0x302A, 0x302F, 1}, - {0x3031, 0x3035, 1}, - {0x3099, 0x309A, 1}, - {0x309D, 0x309E, 1}, - {0x30FC, 0x30FE, 1}, - }, -} diff --git a/vendor/github.com/antchfx/xpath/query.go b/vendor/github.com/antchfx/xpath/query.go deleted file mode 100644 index 47f8076..0000000 --- a/vendor/github.com/antchfx/xpath/query.go +++ /dev/null @@ -1,923 +0,0 @@ -package xpath - -import ( - "bytes" - "fmt" - "hash/fnv" - "reflect" -) - -type iterator interface { - Current() NodeNavigator -} - -// An XPath query interface. -type query interface { - // Select traversing iterator returns a query matched node NodeNavigator. - Select(iterator) NodeNavigator - - // Evaluate evaluates query and returns values of the current query. - Evaluate(iterator) interface{} - - Clone() query -} - -// nopQuery is an empty query that always return nil for any query. -type nopQuery struct { - query -} - -func (nopQuery) Select(iterator) NodeNavigator { return nil } - -func (nopQuery) Evaluate(iterator) interface{} { return nil } - -func (nopQuery) Clone() query { return nopQuery{} } - -// contextQuery is returns current node on the iterator object query. -type contextQuery struct { - count int - Root bool // Moving to root-level node in the current context iterator. -} - -func (c *contextQuery) Select(t iterator) (n NodeNavigator) { - if c.count == 0 { - c.count++ - n = t.Current().Copy() - if c.Root { - n.MoveToRoot() - } - } - return n -} - -func (c *contextQuery) Evaluate(iterator) interface{} { - c.count = 0 - return c -} - -func (c *contextQuery) Clone() query { - return &contextQuery{count: 0, Root: c.Root} -} - -// ancestorQuery is an XPath ancestor node query.(ancestor::*|ancestor-self::*) -type ancestorQuery struct { - iterator func() NodeNavigator - - Self bool - Input query - Predicate func(NodeNavigator) bool -} - -func (a *ancestorQuery) Select(t iterator) NodeNavigator { - for { - if a.iterator == nil { - node := a.Input.Select(t) - if node == nil { - return nil - } - first := true - node = node.Copy() - a.iterator = func() NodeNavigator { - if first && a.Self { - first = false - if a.Predicate(node) { - return node - } - } - for node.MoveToParent() { - if !a.Predicate(node) { - continue - } - return node - } - return nil - } - } - - if node := a.iterator(); node != nil { - return node - } - a.iterator = nil - } -} - -func (a *ancestorQuery) Evaluate(t iterator) interface{} { - a.Input.Evaluate(t) - a.iterator = nil - return a -} - -func (a *ancestorQuery) Test(n NodeNavigator) bool { - return a.Predicate(n) -} - -func (a *ancestorQuery) Clone() query { - return &ancestorQuery{Self: a.Self, Input: a.Input.Clone(), Predicate: a.Predicate} -} - -// attributeQuery is an XPath attribute node query.(@*) -type attributeQuery struct { - iterator func() NodeNavigator - - Input query - Predicate func(NodeNavigator) bool -} - -func (a *attributeQuery) Select(t iterator) NodeNavigator { - for { - if a.iterator == nil { - node := a.Input.Select(t) - if node == nil { - return nil - } - node = node.Copy() - a.iterator = func() NodeNavigator { - for { - onAttr := node.MoveToNextAttribute() - if !onAttr { - return nil - } - if a.Predicate(node) { - return node - } - } - } - } - - if node := a.iterator(); node != nil { - return node - } - a.iterator = nil - } -} - -func (a *attributeQuery) Evaluate(t iterator) interface{} { - a.Input.Evaluate(t) - a.iterator = nil - return a -} - -func (a *attributeQuery) Test(n NodeNavigator) bool { - return a.Predicate(n) -} - -func (a *attributeQuery) Clone() query { - return &attributeQuery{Input: a.Input.Clone(), Predicate: a.Predicate} -} - -// childQuery is an XPath child node query.(child::*) -type childQuery struct { - posit int - iterator func() NodeNavigator - - Input query - Predicate func(NodeNavigator) bool -} - -func (c *childQuery) Select(t iterator) NodeNavigator { - for { - if c.iterator == nil { - c.posit = 0 - node := c.Input.Select(t) - if node == nil { - return nil - } - node = node.Copy() - first := true - c.iterator = func() NodeNavigator { - for { - if (first && !node.MoveToChild()) || (!first && !node.MoveToNext()) { - return nil - } - first = false - if c.Predicate(node) { - return node - } - } - } - } - - if node := c.iterator(); node != nil { - c.posit++ - return node - } - c.iterator = nil - } -} - -func (c *childQuery) Evaluate(t iterator) interface{} { - c.Input.Evaluate(t) - c.iterator = nil - return c -} - -func (c *childQuery) Test(n NodeNavigator) bool { - return c.Predicate(n) -} - -func (c *childQuery) Clone() query { - return &childQuery{Input: c.Input.Clone(), Predicate: c.Predicate} -} - -// position returns a position of current NodeNavigator. -func (c *childQuery) position() int { - return c.posit -} - -// descendantQuery is an XPath descendant node query.(descendant::* | descendant-or-self::*) -type descendantQuery struct { - iterator func() NodeNavigator - posit int - level int - - Self bool - Input query - Predicate func(NodeNavigator) bool -} - -func (d *descendantQuery) Select(t iterator) NodeNavigator { - for { - if d.iterator == nil { - d.posit = 0 - node := d.Input.Select(t) - if node == nil { - return nil - } - node = node.Copy() - d.level = 0 - positmap := make(map[int]int) - first := true - d.iterator = func() NodeNavigator { - if first && d.Self { - first = false - if d.Predicate(node) { - d.posit = 1 - positmap[d.level] = 1 - return node - } - } - - for { - if node.MoveToChild() { - d.level = d.level + 1 - positmap[d.level] = 0 - } else { - for { - if d.level == 0 { - return nil - } - if node.MoveToNext() { - break - } - node.MoveToParent() - d.level = d.level - 1 - } - } - if d.Predicate(node) { - positmap[d.level]++ - d.posit = positmap[d.level] - return node - } - } - } - } - - if node := d.iterator(); node != nil { - return node - } - d.iterator = nil - } -} - -func (d *descendantQuery) Evaluate(t iterator) interface{} { - d.Input.Evaluate(t) - d.iterator = nil - return d -} - -func (d *descendantQuery) Test(n NodeNavigator) bool { - return d.Predicate(n) -} - -// position returns a position of current NodeNavigator. -func (d *descendantQuery) position() int { - return d.posit -} - -func (d *descendantQuery) depth() int { - return d.level -} - -func (d *descendantQuery) Clone() query { - return &descendantQuery{Self: d.Self, Input: d.Input.Clone(), Predicate: d.Predicate} -} - -// followingQuery is an XPath following node query.(following::*|following-sibling::*) -type followingQuery struct { - posit int - iterator func() NodeNavigator - - Input query - Sibling bool // The matching sibling node of current node. - Predicate func(NodeNavigator) bool -} - -func (f *followingQuery) Select(t iterator) NodeNavigator { - for { - if f.iterator == nil { - f.posit = 0 - node := f.Input.Select(t) - if node == nil { - return nil - } - node = node.Copy() - if f.Sibling { - f.iterator = func() NodeNavigator { - for { - if !node.MoveToNext() { - return nil - } - if f.Predicate(node) { - f.posit++ - return node - } - } - } - } else { - var q *descendantQuery // descendant query - f.iterator = func() NodeNavigator { - for { - if q == nil { - for !node.MoveToNext() { - if !node.MoveToParent() { - return nil - } - } - q = &descendantQuery{ - Self: true, - Input: &contextQuery{}, - Predicate: f.Predicate, - } - t.Current().MoveTo(node) - } - if node := q.Select(t); node != nil { - f.posit = q.posit - return node - } - q = nil - } - } - } - } - - if node := f.iterator(); node != nil { - return node - } - f.iterator = nil - } -} - -func (f *followingQuery) Evaluate(t iterator) interface{} { - f.Input.Evaluate(t) - return f -} - -func (f *followingQuery) Test(n NodeNavigator) bool { - return f.Predicate(n) -} - -func (f *followingQuery) Clone() query { - return &followingQuery{Input: f.Input.Clone(), Sibling: f.Sibling, Predicate: f.Predicate} -} - -func (f *followingQuery) position() int { - return f.posit -} - -// precedingQuery is an XPath preceding node query.(preceding::*) -type precedingQuery struct { - iterator func() NodeNavigator - posit int - Input query - Sibling bool // The matching sibling node of current node. - Predicate func(NodeNavigator) bool -} - -func (p *precedingQuery) Select(t iterator) NodeNavigator { - for { - if p.iterator == nil { - p.posit = 0 - node := p.Input.Select(t) - if node == nil { - return nil - } - node = node.Copy() - if p.Sibling { - p.iterator = func() NodeNavigator { - for { - for !node.MoveToPrevious() { - return nil - } - if p.Predicate(node) { - p.posit++ - return node - } - } - } - } else { - var q query - p.iterator = func() NodeNavigator { - for { - if q == nil { - for !node.MoveToPrevious() { - if !node.MoveToParent() { - return nil - } - p.posit = 0 - } - q = &descendantQuery{ - Self: true, - Input: &contextQuery{}, - Predicate: p.Predicate, - } - t.Current().MoveTo(node) - } - if node := q.Select(t); node != nil { - p.posit++ - return node - } - q = nil - } - } - } - } - if node := p.iterator(); node != nil { - return node - } - p.iterator = nil - } -} - -func (p *precedingQuery) Evaluate(t iterator) interface{} { - p.Input.Evaluate(t) - return p -} - -func (p *precedingQuery) Test(n NodeNavigator) bool { - return p.Predicate(n) -} - -func (p *precedingQuery) Clone() query { - return &precedingQuery{Input: p.Input.Clone(), Sibling: p.Sibling, Predicate: p.Predicate} -} - -func (p *precedingQuery) position() int { - return p.posit -} - -// parentQuery is an XPath parent node query.(parent::*) -type parentQuery struct { - Input query - Predicate func(NodeNavigator) bool -} - -func (p *parentQuery) Select(t iterator) NodeNavigator { - for { - node := p.Input.Select(t) - if node == nil { - return nil - } - node = node.Copy() - if node.MoveToParent() && p.Predicate(node) { - return node - } - } -} - -func (p *parentQuery) Evaluate(t iterator) interface{} { - p.Input.Evaluate(t) - return p -} - -func (p *parentQuery) Clone() query { - return &parentQuery{Input: p.Input.Clone(), Predicate: p.Predicate} -} - -func (p *parentQuery) Test(n NodeNavigator) bool { - return p.Predicate(n) -} - -// selfQuery is an Self node query.(self::*) -type selfQuery struct { - Input query - Predicate func(NodeNavigator) bool -} - -func (s *selfQuery) Select(t iterator) NodeNavigator { - for { - node := s.Input.Select(t) - if node == nil { - return nil - } - - if s.Predicate(node) { - return node - } - } -} - -func (s *selfQuery) Evaluate(t iterator) interface{} { - s.Input.Evaluate(t) - return s -} - -func (s *selfQuery) Test(n NodeNavigator) bool { - return s.Predicate(n) -} - -func (s *selfQuery) Clone() query { - return &selfQuery{Input: s.Input.Clone(), Predicate: s.Predicate} -} - -// filterQuery is an XPath query for predicate filter. -type filterQuery struct { - Input query - Predicate query - posit int - positmap map[int]int -} - -func (f *filterQuery) do(t iterator) bool { - val := reflect.ValueOf(f.Predicate.Evaluate(t)) - switch val.Kind() { - case reflect.Bool: - return val.Bool() - case reflect.String: - return len(val.String()) > 0 - case reflect.Float64: - pt := getNodePosition(f.Input) - return int(val.Float()) == pt - default: - if q, ok := f.Predicate.(query); ok { - return q.Select(t) != nil - } - } - return false -} - -func (f *filterQuery) position() int { - return f.posit -} - -func (f *filterQuery) Select(t iterator) NodeNavigator { - if f.positmap == nil { - f.positmap = make(map[int]int) - } - for { - - node := f.Input.Select(t) - if node == nil { - return node - } - node = node.Copy() - - t.Current().MoveTo(node) - if f.do(t) { - // fix https://github.com/antchfx/htmlquery/issues/26 - // Calculate and keep the each of matching node's position in the same depth. - level := getNodeDepth(f.Input) - f.positmap[level]++ - f.posit = f.positmap[level] - return node - } - } -} - -func (f *filterQuery) Evaluate(t iterator) interface{} { - f.Input.Evaluate(t) - return f -} - -func (f *filterQuery) Clone() query { - return &filterQuery{Input: f.Input.Clone(), Predicate: f.Predicate.Clone()} -} - -// functionQuery is an XPath function that returns a computed value for -// the Evaluate call of the current NodeNavigator node. Select call isn't -// applicable for functionQuery. -type functionQuery struct { - Input query // Node Set - Func func(query, iterator) interface{} // The xpath function. -} - -func (f *functionQuery) Select(t iterator) NodeNavigator { - return nil -} - -// Evaluate call a specified function that will returns the -// following value type: number,string,boolean. -func (f *functionQuery) Evaluate(t iterator) interface{} { - return f.Func(f.Input, t) -} - -func (f *functionQuery) Clone() query { - return &functionQuery{Input: f.Input.Clone(), Func: f.Func} -} - -// transformFunctionQuery diffs from functionQuery where the latter computes a scalar -// value (number,string,boolean) for the current NodeNavigator node while the former -// (transformFunctionQuery) performs a mapping or transform of the current NodeNavigator -// and returns a new NodeNavigator. It is used for non-scalar XPath functions such as -// reverse(), remove(), subsequence(), unordered(), etc. -type transformFunctionQuery struct { - Input query - Func func(query, iterator) func() NodeNavigator - iterator func() NodeNavigator -} - -func (f *transformFunctionQuery) Select(t iterator) NodeNavigator { - if f.iterator == nil { - f.iterator = f.Func(f.Input, t) - } - return f.iterator() -} - -func (f *transformFunctionQuery) Evaluate(t iterator) interface{} { - f.Input.Evaluate(t) - f.iterator = nil - return f -} - -func (f *transformFunctionQuery) Clone() query { - return &transformFunctionQuery{Input: f.Input.Clone(), Func: f.Func} -} - -// constantQuery is an XPath constant operand. -type constantQuery struct { - Val interface{} -} - -func (c *constantQuery) Select(t iterator) NodeNavigator { - return nil -} - -func (c *constantQuery) Evaluate(t iterator) interface{} { - return c.Val -} - -func (c *constantQuery) Clone() query { - return c -} - -// logicalQuery is an XPath logical expression. -type logicalQuery struct { - Left, Right query - - Do func(iterator, interface{}, interface{}) interface{} -} - -func (l *logicalQuery) Select(t iterator) NodeNavigator { - // When a XPath expr is logical expression. - node := t.Current().Copy() - val := l.Evaluate(t) - switch val.(type) { - case bool: - if val.(bool) == true { - return node - } - } - return nil -} - -func (l *logicalQuery) Evaluate(t iterator) interface{} { - m := l.Left.Evaluate(t) - n := l.Right.Evaluate(t) - return l.Do(t, m, n) -} - -func (l *logicalQuery) Clone() query { - return &logicalQuery{Left: l.Left.Clone(), Right: l.Right.Clone(), Do: l.Do} -} - -// numericQuery is an XPath numeric operator expression. -type numericQuery struct { - Left, Right query - - Do func(interface{}, interface{}) interface{} -} - -func (n *numericQuery) Select(t iterator) NodeNavigator { - return nil -} - -func (n *numericQuery) Evaluate(t iterator) interface{} { - m := n.Left.Evaluate(t) - k := n.Right.Evaluate(t) - return n.Do(m, k) -} - -func (n *numericQuery) Clone() query { - return &numericQuery{Left: n.Left.Clone(), Right: n.Right.Clone(), Do: n.Do} -} - -type booleanQuery struct { - IsOr bool - Left, Right query - iterator func() NodeNavigator -} - -func (b *booleanQuery) Select(t iterator) NodeNavigator { - if b.iterator == nil { - var list []NodeNavigator - i := 0 - root := t.Current().Copy() - if b.IsOr { - for { - node := b.Left.Select(t) - if node == nil { - break - } - node = node.Copy() - list = append(list, node) - } - t.Current().MoveTo(root) - for { - node := b.Right.Select(t) - if node == nil { - break - } - node = node.Copy() - list = append(list, node) - } - } else { - var m []NodeNavigator - var n []NodeNavigator - for { - node := b.Left.Select(t) - if node == nil { - break - } - node = node.Copy() - list = append(m, node) - } - t.Current().MoveTo(root) - for { - node := b.Right.Select(t) - if node == nil { - break - } - node = node.Copy() - list = append(n, node) - } - for _, k := range m { - for _, j := range n { - if k == j { - list = append(list, k) - } - } - } - } - - b.iterator = func() NodeNavigator { - if i >= len(list) { - return nil - } - node := list[i] - i++ - return node - } - } - return b.iterator() -} - -func (b *booleanQuery) Evaluate(t iterator) interface{} { - m := b.Left.Evaluate(t) - left := asBool(t, m) - if b.IsOr && left { - return true - } else if !b.IsOr && !left { - return false - } - m = b.Right.Evaluate(t) - return asBool(t, m) -} - -func (b *booleanQuery) Clone() query { - return &booleanQuery{IsOr: b.IsOr, Left: b.Left.Clone(), Right: b.Right.Clone()} -} - -type unionQuery struct { - Left, Right query - iterator func() NodeNavigator -} - -func (u *unionQuery) Select(t iterator) NodeNavigator { - if u.iterator == nil { - var list []NodeNavigator - var m = make(map[uint64]bool) - root := t.Current().Copy() - for { - node := u.Left.Select(t) - if node == nil { - break - } - code := getHashCode(node.Copy()) - if _, ok := m[code]; !ok { - m[code] = true - list = append(list, node.Copy()) - } - } - t.Current().MoveTo(root) - for { - node := u.Right.Select(t) - if node == nil { - break - } - code := getHashCode(node.Copy()) - if _, ok := m[code]; !ok { - m[code] = true - list = append(list, node.Copy()) - } - } - var i int - u.iterator = func() NodeNavigator { - if i >= len(list) { - return nil - } - node := list[i] - i++ - return node - } - } - return u.iterator() -} - -func (u *unionQuery) Evaluate(t iterator) interface{} { - u.iterator = nil - u.Left.Evaluate(t) - u.Right.Evaluate(t) - return u -} - -func (u *unionQuery) Clone() query { - return &unionQuery{Left: u.Left.Clone(), Right: u.Right.Clone()} -} - -func getHashCode(n NodeNavigator) uint64 { - var sb bytes.Buffer - switch n.NodeType() { - case AttributeNode, TextNode, CommentNode: - sb.WriteString(fmt.Sprintf("%s=%s", n.LocalName(), n.Value())) - // https://github.com/antchfx/htmlquery/issues/25 - d := 1 - for n.MoveToPrevious() { - d++ - } - sb.WriteString(fmt.Sprintf("-%d", d)) - for n.MoveToParent() { - d = 1 - for n.MoveToPrevious() { - d++ - } - sb.WriteString(fmt.Sprintf("-%d", d)) - } - case ElementNode: - sb.WriteString(n.Prefix() + n.LocalName()) - d := 1 - for n.MoveToPrevious() { - d++ - } - sb.WriteString(fmt.Sprintf("-%d", d)) - - for n.MoveToParent() { - d = 1 - for n.MoveToPrevious() { - d++ - } - sb.WriteString(fmt.Sprintf("-%d", d)) - } - } - h := fnv.New64a() - h.Write([]byte(sb.String())) - return h.Sum64() -} - -func getNodePosition(q query) int { - type Position interface { - position() int - } - if count, ok := q.(Position); ok { - return count.position() - } - return 1 -} - -func getNodeDepth(q query) int { - type Depth interface { - depth() int - } - if count, ok := q.(Depth); ok { - return count.depth() - } - return 0 -} diff --git a/vendor/github.com/antchfx/xpath/xpath.go b/vendor/github.com/antchfx/xpath/xpath.go deleted file mode 100644 index 5f6aa89..0000000 --- a/vendor/github.com/antchfx/xpath/xpath.go +++ /dev/null @@ -1,161 +0,0 @@ -package xpath - -import ( - "errors" - "fmt" -) - -// NodeType represents a type of XPath node. -type NodeType int - -const ( - // RootNode is a root node of the XML document or node tree. - RootNode NodeType = iota - - // ElementNode is an element, such as . - ElementNode - - // AttributeNode is an attribute, such as id='123'. - AttributeNode - - // TextNode is the text content of a node. - TextNode - - // CommentNode is a comment node, such as - CommentNode - - // allNode is any types of node, used by xpath package only to predicate match. - allNode -) - -// NodeNavigator provides cursor model for navigating XML data. -type NodeNavigator interface { - // NodeType returns the XPathNodeType of the current node. - NodeType() NodeType - - // LocalName gets the Name of the current node. - LocalName() string - - // Prefix returns namespace prefix associated with the current node. - Prefix() string - - // Value gets the value of current node. - Value() string - - // Copy does a deep copy of the NodeNavigator and all its components. - Copy() NodeNavigator - - // MoveToRoot moves the NodeNavigator to the root node of the current node. - MoveToRoot() - - // MoveToParent moves the NodeNavigator to the parent node of the current node. - MoveToParent() bool - - // MoveToNextAttribute moves the NodeNavigator to the next attribute on current node. - MoveToNextAttribute() bool - - // MoveToChild moves the NodeNavigator to the first child node of the current node. - MoveToChild() bool - - // MoveToFirst moves the NodeNavigator to the first sibling node of the current node. - MoveToFirst() bool - - // MoveToNext moves the NodeNavigator to the next sibling node of the current node. - MoveToNext() bool - - // MoveToPrevious moves the NodeNavigator to the previous sibling node of the current node. - MoveToPrevious() bool - - // MoveTo moves the NodeNavigator to the same position as the specified NodeNavigator. - MoveTo(NodeNavigator) bool -} - -// NodeIterator holds all matched Node object. -type NodeIterator struct { - node NodeNavigator - query query -} - -// Current returns current node which matched. -func (t *NodeIterator) Current() NodeNavigator { - return t.node -} - -// MoveNext moves Navigator to the next match node. -func (t *NodeIterator) MoveNext() bool { - n := t.query.Select(t) - if n != nil { - if !t.node.MoveTo(n) { - t.node = n.Copy() - } - return true - } - return false -} - -// Select selects a node set using the specified XPath expression. -// This method is deprecated, recommend using Expr.Select() method instead. -func Select(root NodeNavigator, expr string) *NodeIterator { - exp, err := Compile(expr) - if err != nil { - panic(err) - } - return exp.Select(root) -} - -// Expr is an XPath expression for query. -type Expr struct { - s string - q query -} - -type iteratorFunc func() NodeNavigator - -func (f iteratorFunc) Current() NodeNavigator { - return f() -} - -// Evaluate returns the result of the expression. -// The result type of the expression is one of the follow: bool,float64,string,NodeIterator). -func (expr *Expr) Evaluate(root NodeNavigator) interface{} { - val := expr.q.Evaluate(iteratorFunc(func() NodeNavigator { return root })) - switch val.(type) { - case query: - return &NodeIterator{query: expr.q.Clone(), node: root} - } - return val -} - -// Select selects a node set using the specified XPath expression. -func (expr *Expr) Select(root NodeNavigator) *NodeIterator { - return &NodeIterator{query: expr.q.Clone(), node: root} -} - -// String returns XPath expression string. -func (expr *Expr) String() string { - return expr.s -} - -// Compile compiles an XPath expression string. -func Compile(expr string) (*Expr, error) { - if expr == "" { - return nil, errors.New("expr expression is nil") - } - qy, err := build(expr) - if err != nil { - return nil, err - } - if qy == nil { - return nil, fmt.Errorf(fmt.Sprintf("undeclared variable in XPath expression: %s", expr)) - } - return &Expr{s: expr, q: qy}, nil -} - -// MustCompile compiles an XPath expression string and ignored error. -func MustCompile(expr string) *Expr { - exp, err := Compile(expr) - if err != nil { - return &Expr{s: expr, q: nopQuery{}} - } - return exp -} diff --git a/vendor/github.com/gocolly/colly/v2/.codecov.yml b/vendor/github.com/gocolly/colly/v2/.codecov.yml deleted file mode 100644 index 69cb760..0000000 --- a/vendor/github.com/gocolly/colly/v2/.codecov.yml +++ /dev/null @@ -1 +0,0 @@ -comment: false diff --git a/vendor/github.com/gocolly/colly/v2/.travis.yml b/vendor/github.com/gocolly/colly/v2/.travis.yml deleted file mode 100644 index b6a71f5..0000000 --- a/vendor/github.com/gocolly/colly/v2/.travis.yml +++ /dev/null @@ -1,17 +0,0 @@ -language: go -sudo: false -go: - - 1.11.x - - 1.12.x - - 1.13.x - - tip -script: - - go get -u golang.org/x/lint/golint - - OUT="$(go get -a)"; test -z "$OUT" || (echo "$OUT" && return 1) - - OUT="$(gofmt -l -d ./)"; test -z "$OUT" || (echo "$OUT" && return 1) - - OUT="$(golint ./...)"; test -z "$OUT" || (echo "$OUT" && return 1) - - go vet -v ./... - - go test -race -v -coverprofile=coverage.txt -covermode=atomic ./ - - go build -after_success: - - bash <(curl -s https://codecov.io/bash) diff --git a/vendor/github.com/gocolly/colly/v2/BUILD.bazel b/vendor/github.com/gocolly/colly/v2/BUILD.bazel deleted file mode 100644 index 11ac981..0000000 --- a/vendor/github.com/gocolly/colly/v2/BUILD.bazel +++ /dev/null @@ -1,33 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -go_library( - name = "colly", - srcs = [ - "colly.go", - "context.go", - "htmlelement.go", - "http_backend.go", - "http_trace.go", - "request.go", - "response.go", - "unmarshal.go", - "xmlelement.go", - ], - importmap = "peridot.resf.org/vendor/github.com/gocolly/colly/v2", - importpath = "github.com/gocolly/colly/v2", - visibility = ["//visibility:public"], - deps = [ - "//vendor/github.com/PuerkitoBio/goquery", - "//vendor/github.com/antchfx/htmlquery", - "//vendor/github.com/antchfx/xmlquery", - "//vendor/github.com/gobwas/glob", - "//vendor/github.com/gocolly/colly/v2/debug", - "//vendor/github.com/gocolly/colly/v2/storage", - "//vendor/github.com/kennygrant/sanitize", - "//vendor/github.com/saintfish/chardet", - "//vendor/github.com/temoto/robotstxt", - "@org_golang_google_appengine//urlfetch:go_default_library", - "@org_golang_x_net//html", - "@org_golang_x_net//html/charset", - ], -) diff --git a/vendor/github.com/gocolly/colly/v2/CHANGELOG.md b/vendor/github.com/gocolly/colly/v2/CHANGELOG.md deleted file mode 100644 index a2bba97..0000000 --- a/vendor/github.com/gocolly/colly/v2/CHANGELOG.md +++ /dev/null @@ -1,33 +0,0 @@ -# 2.0.0 - 2019.11.28 - - - Breaking change: Change Collector.RedirectHandler member to Collector.SetRedirectHandler function - - Go module support - - Collector.HasVisited method added to be able to check if an url has been visited - - Collector.SetClient method introduced - - HTMLElement.ChildTexts method added - - New user agents - - Multiple bugfixes - -# 1.2.0 - 2019.02.13 - - - Compatibility with the latest htmlquery package - - New request shortcut for HEAD requests - - Check URL availibility before visiting - - Fix proxy URL value - - Request counter fix - - Minor fixes in examples - -# 1.1.0 - 2018.08.13 - - - Appengine integration takes context.Context instead of http.Request (API change) - - Added "Accept" http header by default to every request - - Support slices of pointers in unmarshal - - Fixed a race condition in queues - - ForEachWithBreak method added to HTMLElement - - Added a local file example - - Support gzip decompression of response bodies - - Don't share waitgroup when cloning a collector - - Fixed instagram example - - -# 1.0.0 - 2018.05.13 diff --git a/vendor/github.com/gocolly/colly/v2/CONTRIBUTING.md b/vendor/github.com/gocolly/colly/v2/CONTRIBUTING.md deleted file mode 100644 index 17df636..0000000 --- a/vendor/github.com/gocolly/colly/v2/CONTRIBUTING.md +++ /dev/null @@ -1,67 +0,0 @@ -# Contribute - -## Introduction - -First, thank you for considering contributing to colly! It's people like you that make the open source community such a great community! 😊 - -We welcome any type of contribution, not only code. You can help with -- **QA**: file bug reports, the more details you can give the better (e.g. screenshots with the console open) -- **Marketing**: writing blog posts, howto's, printing stickers, ... -- **Community**: presenting the project at meetups, organizing a dedicated meetup for the local community, ... -- **Code**: take a look at the [open issues](https://github.com/gocolly/colly/issues). Even if you can't write code, commenting on them, showing that you care about a given issue matters. It helps us triage them. -- **Money**: we welcome financial contributions in full transparency on our [open collective](https://opencollective.com/colly). - -## Your First Contribution - -Working on your first Pull Request? You can learn how from this *free* series, [How to Contribute to an Open Source Project on GitHub](https://egghead.io/series/how-to-contribute-to-an-open-source-project-on-github). - -## Submitting code - -Any code change should be submitted as a pull request. The description should explain what the code does and give steps to execute it. The pull request should also contain tests. - -## Code review process - -The bigger the pull request, the longer it will take to review and merge. Try to break down large pull requests in smaller chunks that are easier to review and merge. -It is also always helpful to have some context for your pull request. What was the purpose? Why does it matter to you? - -## Financial contributions - -We also welcome financial contributions in full transparency on our [open collective](https://opencollective.com/colly). -Anyone can file an expense. If the expense makes sense for the development of the community, it will be "merged" in the ledger of our open collective by the core contributors and the person who filed the expense will be reimbursed. - -## Questions - -If you have any questions, create an [issue](https://github.com/gocolly/colly/issues/new) (protip: do a quick search first to see if someone else didn't ask the same question before!). -You can also reach us at hello@colly.opencollective.com. - -## Credits - -### Contributors - -Thank you to all the people who have already contributed to colly! - - - -### Backers - -Thank you to all our backers! [[Become a backer](https://opencollective.com/colly#backer)] - - - - -### Sponsors - -Thank you to all our sponsors! (please ask your company to also support this open source project by [becoming a sponsor](https://opencollective.com/colly#sponsor)) - - - - - - - - - - - - - diff --git a/vendor/github.com/gocolly/colly/v2/LICENSE.txt b/vendor/github.com/gocolly/colly/v2/LICENSE.txt deleted file mode 100644 index d645695..0000000 --- a/vendor/github.com/gocolly/colly/v2/LICENSE.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/vendor/github.com/gocolly/colly/v2/README.md b/vendor/github.com/gocolly/colly/v2/README.md deleted file mode 100644 index 91b7442..0000000 --- a/vendor/github.com/gocolly/colly/v2/README.md +++ /dev/null @@ -1,117 +0,0 @@ -# Colly - -Lightning Fast and Elegant Scraping Framework for Gophers - -Colly provides a clean interface to write any kind of crawler/scraper/spider. - -With Colly you can easily extract structured data from websites, which can be used for a wide range of applications, like data mining, data processing or archiving. - -[![GoDoc](https://godoc.org/github.com/gocolly/colly?status.svg)](https://pkg.go.dev/github.com/gocolly/colly/v2) -[![Backers on Open Collective](https://opencollective.com/colly/backers/badge.svg)](#backers) [![Sponsors on Open Collective](https://opencollective.com/colly/sponsors/badge.svg)](#sponsors) [![build status](https://img.shields.io/travis/gocolly/colly/master.svg?style=flat-square)](https://travis-ci.org/gocolly/colly) -[![report card](https://img.shields.io/badge/report%20card-a%2B-ff3333.svg?style=flat-square)](http://goreportcard.com/report/gocolly/colly) -[![view examples](https://img.shields.io/badge/learn%20by-examples-0077b3.svg?style=flat-square)](https://github.com/gocolly/colly/tree/master/_examples) -[![Code Coverage](https://img.shields.io/codecov/c/github/gocolly/colly/master.svg)](https://codecov.io/github/gocolly/colly?branch=master) -[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fgocolly%2Fcolly.svg?type=shield)](https://app.fossa.io/projects/git%2Bgithub.com%2Fgocolly%2Fcolly?ref=badge_shield) -[![Twitter URL](https://img.shields.io/badge/twitter-follow-green.svg)](https://twitter.com/gocolly) - -## Features - -- Clean API -- Fast (>1k request/sec on a single core) -- Manages request delays and maximum concurrency per domain -- Automatic cookie and session handling -- Sync/async/parallel scraping -- Caching -- Automatic encoding of non-unicode responses -- Robots.txt support -- Distributed scraping -- Configuration via environment variables -- Extensions - -## Example - -```go -func main() { - c := colly.NewCollector() - - // Find and visit all links - c.OnHTML("a[href]", func(e *colly.HTMLElement) { - e.Request.Visit(e.Attr("href")) - }) - - c.OnRequest(func(r *colly.Request) { - fmt.Println("Visiting", r.URL) - }) - - c.Visit("http://go-colly.org/") -} -``` - -See [examples folder](https://github.com/gocolly/colly/tree/master/_examples) for more detailed examples. - -## Installation - -Add colly to your `go.mod` file: - -``` -module github.com/x/y - -go 1.14 - -require ( - github.com/gocolly/colly/v2 latest -) -``` - -## Bugs - -Bugs or suggestions? Visit the [issue tracker](https://github.com/gocolly/colly/issues) or join `#colly` on freenode - -## Other Projects Using Colly - -Below is a list of public, open source projects that use Colly: - -- [greenpeace/check-my-pages](https://github.com/greenpeace/check-my-pages) Scraping script to test the Spanish Greenpeace web archive. -- [altsab/gowap](https://github.com/altsab/gowap) Wappalyzer implementation in Go. -- [jesuiscamille/goquotes](https://github.com/jesuiscamille/goquotes) A quotes scrapper, making your day a little better! -- [jivesearch/jivesearch](https://github.com/jivesearch/jivesearch) A search engine that doesn't track you. -- [Leagify/colly-draft-prospects](https://github.com/Leagify/colly-draft-prospects) A scraper for future NFL Draft prospects. -- [lucasepe/go-ps4](https://github.com/lucasepe/go-ps4) Search playstation store for your favorite PS4 games using the command line. -- [yringler/inside-chassidus-scraper](https://github.com/yringler/inside-chassidus-scraper) Scrapes Rabbi Paltiel's web site for lesson metadata. -- [gamedb/gamedb](https://github.com/gamedb/gamedb) A database of Steam games. -- [lawzava/scrape](https://github.com/lawzava/scrape) CLI for email scraping from any website. -- [eureka101v/WeiboSpiderGo](https://github.com/eureka101v/WeiboSpiderGo) A sina weibo(chinese twitter) scrapper -- [Go-phie/gophie](https://github.com/Go-phie/gophie) Search, Download and Stream movies from your terminal -- [imthaghost/goclone](https://github.com/imthaghost/goclone) Clone websites to your computer within seconds. - -If you are using Colly in a project please send a pull request to add it to the list. - -## Contributors - -This project exists thanks to all the people who contribute. [[Contribute]](CONTRIBUTING.md). - - -## Backers - -Thank you to all our backers! 🙏 [[Become a backer](https://opencollective.com/colly#backer)] - - - -## Sponsors - -Support this project by becoming a sponsor. Your logo will show up here with a link to your website. [[Become a sponsor](https://opencollective.com/colly#sponsor)] - - - - - - - - - - - - -## License - -[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fgocolly%2Fcolly.svg?type=large)](https://app.fossa.io/projects/git%2Bgithub.com%2Fgocolly%2Fcolly?ref=badge_large) diff --git a/vendor/github.com/gocolly/colly/v2/VERSION b/vendor/github.com/gocolly/colly/v2/VERSION deleted file mode 100644 index 7ec1d6d..0000000 --- a/vendor/github.com/gocolly/colly/v2/VERSION +++ /dev/null @@ -1 +0,0 @@ -2.1.0 diff --git a/vendor/github.com/gocolly/colly/v2/colly.go b/vendor/github.com/gocolly/colly/v2/colly.go deleted file mode 100644 index b5b9f4d..0000000 --- a/vendor/github.com/gocolly/colly/v2/colly.go +++ /dev/null @@ -1,1430 +0,0 @@ -// Copyright 2018 Adam Tauber -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package colly implements a HTTP scraping framework -package colly - -import ( - "bytes" - "context" - "crypto/rand" - "encoding/json" - "errors" - "fmt" - "hash/fnv" - "io" - "io/ioutil" - "log" - "net/http" - "net/http/cookiejar" - "net/url" - "os" - "path/filepath" - "regexp" - "strconv" - "strings" - "sync" - "sync/atomic" - "time" - - "github.com/PuerkitoBio/goquery" - "github.com/antchfx/htmlquery" - "github.com/antchfx/xmlquery" - "github.com/gocolly/colly/v2/debug" - "github.com/gocolly/colly/v2/storage" - "github.com/kennygrant/sanitize" - "github.com/temoto/robotstxt" - "google.golang.org/appengine/urlfetch" -) - -// A CollectorOption sets an option on a Collector. -type CollectorOption func(*Collector) - -// Collector provides the scraper instance for a scraping job -type Collector struct { - // UserAgent is the User-Agent string used by HTTP requests - UserAgent string - // MaxDepth limits the recursion depth of visited URLs. - // Set it to 0 for infinite recursion (default). - MaxDepth int - // AllowedDomains is a domain whitelist. - // Leave it blank to allow any domains to be visited - AllowedDomains []string - // DisallowedDomains is a domain blacklist. - DisallowedDomains []string - // DisallowedURLFilters is a list of regular expressions which restricts - // visiting URLs. If any of the rules matches to a URL the - // request will be stopped. DisallowedURLFilters will - // be evaluated before URLFilters - // Leave it blank to allow any URLs to be visited - DisallowedURLFilters []*regexp.Regexp - // URLFilters is a list of regular expressions which restricts - // visiting URLs. If any of the rules matches to a URL the - // request won't be stopped. DisallowedURLFilters will - // be evaluated before URLFilters - - // Leave it blank to allow any URLs to be visited - URLFilters []*regexp.Regexp - - // AllowURLRevisit allows multiple downloads of the same URL - AllowURLRevisit bool - // MaxBodySize is the limit of the retrieved response body in bytes. - // 0 means unlimited. - // The default value for MaxBodySize is 10MB (10 * 1024 * 1024 bytes). - MaxBodySize int - // CacheDir specifies a location where GET requests are cached as files. - // When it's not defined, caching is disabled. - CacheDir string - // IgnoreRobotsTxt allows the Collector to ignore any restrictions set by - // the target host's robots.txt file. See http://www.robotstxt.org/ for more - // information. - IgnoreRobotsTxt bool - // Async turns on asynchronous network communication. Use Collector.Wait() to - // be sure all requests have been finished. - Async bool - // ParseHTTPErrorResponse allows parsing HTTP responses with non 2xx status codes. - // By default, Colly parses only successful HTTP responses. Set ParseHTTPErrorResponse - // to true to enable it. - ParseHTTPErrorResponse bool - // ID is the unique identifier of a collector - ID uint32 - // DetectCharset can enable character encoding detection for non-utf8 response bodies - // without explicit charset declaration. This feature uses https://github.com/saintfish/chardet - DetectCharset bool - // RedirectHandler allows control on how a redirect will be managed - // use c.SetRedirectHandler to set this value - redirectHandler func(req *http.Request, via []*http.Request) error - // CheckHead performs a HEAD request before every GET to pre-validate the response - CheckHead bool - // TraceHTTP enables capturing and reporting request performance for crawler tuning. - // When set to true, the Response.Trace will be filled in with an HTTPTrace object. - TraceHTTP bool - store storage.Storage - debugger debug.Debugger - robotsMap map[string]*robotstxt.RobotsData - htmlCallbacks []*htmlCallbackContainer - xmlCallbacks []*xmlCallbackContainer - requestCallbacks []RequestCallback - responseCallbacks []ResponseCallback - responseHeadersCallbacks []ResponseHeadersCallback - errorCallbacks []ErrorCallback - scrapedCallbacks []ScrapedCallback - requestCount uint32 - responseCount uint32 - backend *httpBackend - wg *sync.WaitGroup - lock *sync.RWMutex -} - -// RequestCallback is a type alias for OnRequest callback functions -type RequestCallback func(*Request) - -// ResponseHeadersCallback is a type alias for OnResponseHeaders callback functions -type ResponseHeadersCallback func(*Response) - -// ResponseCallback is a type alias for OnResponse callback functions -type ResponseCallback func(*Response) - -// HTMLCallback is a type alias for OnHTML callback functions -type HTMLCallback func(*HTMLElement) - -// XMLCallback is a type alias for OnXML callback functions -type XMLCallback func(*XMLElement) - -// ErrorCallback is a type alias for OnError callback functions -type ErrorCallback func(*Response, error) - -// ScrapedCallback is a type alias for OnScraped callback functions -type ScrapedCallback func(*Response) - -// ProxyFunc is a type alias for proxy setter functions. -type ProxyFunc func(*http.Request) (*url.URL, error) - -type htmlCallbackContainer struct { - Selector string - Function HTMLCallback -} - -type xmlCallbackContainer struct { - Query string - Function XMLCallback -} - -type cookieJarSerializer struct { - store storage.Storage - lock *sync.RWMutex -} - -var collectorCounter uint32 - -// The key type is unexported to prevent collisions with context keys defined in -// other packages. -type key int - -// ProxyURLKey is the context key for the request proxy address. -const ProxyURLKey key = iota - -var ( - // ErrForbiddenDomain is the error thrown if visiting - // a domain which is not allowed in AllowedDomains - ErrForbiddenDomain = errors.New("Forbidden domain") - // ErrMissingURL is the error type for missing URL errors - ErrMissingURL = errors.New("Missing URL") - // ErrMaxDepth is the error type for exceeding max depth - ErrMaxDepth = errors.New("Max depth limit reached") - // ErrForbiddenURL is the error thrown if visiting - // a URL which is not allowed by URLFilters - ErrForbiddenURL = errors.New("ForbiddenURL") - - // ErrNoURLFiltersMatch is the error thrown if visiting - // a URL which is not allowed by URLFilters - ErrNoURLFiltersMatch = errors.New("No URLFilters match") - // ErrAlreadyVisited is the error type for already visited URLs - ErrAlreadyVisited = errors.New("URL already visited") - // ErrRobotsTxtBlocked is the error type for robots.txt errors - ErrRobotsTxtBlocked = errors.New("URL blocked by robots.txt") - // ErrNoCookieJar is the error type for missing cookie jar - ErrNoCookieJar = errors.New("Cookie jar is not available") - // ErrNoPattern is the error type for LimitRules without patterns - ErrNoPattern = errors.New("No pattern defined in LimitRule") - // ErrEmptyProxyURL is the error type for empty Proxy URL list - ErrEmptyProxyURL = errors.New("Proxy URL list is empty") - // ErrAbortedAfterHeaders is the error returned when OnResponseHeaders aborts the transfer. - ErrAbortedAfterHeaders = errors.New("Aborted after receiving response headers") - // ErrQueueFull is the error returned when the queue is full - ErrQueueFull = errors.New("Queue MaxSize reached") -) - -var envMap = map[string]func(*Collector, string){ - "ALLOWED_DOMAINS": func(c *Collector, val string) { - c.AllowedDomains = strings.Split(val, ",") - }, - "CACHE_DIR": func(c *Collector, val string) { - c.CacheDir = val - }, - "DETECT_CHARSET": func(c *Collector, val string) { - c.DetectCharset = isYesString(val) - }, - "DISABLE_COOKIES": func(c *Collector, _ string) { - c.backend.Client.Jar = nil - }, - "DISALLOWED_DOMAINS": func(c *Collector, val string) { - c.DisallowedDomains = strings.Split(val, ",") - }, - "IGNORE_ROBOTSTXT": func(c *Collector, val string) { - c.IgnoreRobotsTxt = isYesString(val) - }, - "FOLLOW_REDIRECTS": func(c *Collector, val string) { - if !isYesString(val) { - c.redirectHandler = func(req *http.Request, via []*http.Request) error { - return http.ErrUseLastResponse - } - } - }, - "MAX_BODY_SIZE": func(c *Collector, val string) { - size, err := strconv.Atoi(val) - if err == nil { - c.MaxBodySize = size - } - }, - "MAX_DEPTH": func(c *Collector, val string) { - maxDepth, err := strconv.Atoi(val) - if err == nil { - c.MaxDepth = maxDepth - } - }, - "PARSE_HTTP_ERROR_RESPONSE": func(c *Collector, val string) { - c.ParseHTTPErrorResponse = isYesString(val) - }, - "TRACE_HTTP": func(c *Collector, val string) { - c.TraceHTTP = isYesString(val) - }, - "USER_AGENT": func(c *Collector, val string) { - c.UserAgent = val - }, -} - -// NewCollector creates a new Collector instance with default configuration -func NewCollector(options ...CollectorOption) *Collector { - c := &Collector{} - c.Init() - - for _, f := range options { - f(c) - } - - c.parseSettingsFromEnv() - - return c -} - -// UserAgent sets the user agent used by the Collector. -func UserAgent(ua string) CollectorOption { - return func(c *Collector) { - c.UserAgent = ua - } -} - -// MaxDepth limits the recursion depth of visited URLs. -func MaxDepth(depth int) CollectorOption { - return func(c *Collector) { - c.MaxDepth = depth - } -} - -// AllowedDomains sets the domain whitelist used by the Collector. -func AllowedDomains(domains ...string) CollectorOption { - return func(c *Collector) { - c.AllowedDomains = domains - } -} - -// ParseHTTPErrorResponse allows parsing responses with HTTP errors -func ParseHTTPErrorResponse() CollectorOption { - return func(c *Collector) { - c.ParseHTTPErrorResponse = true - } -} - -// DisallowedDomains sets the domain blacklist used by the Collector. -func DisallowedDomains(domains ...string) CollectorOption { - return func(c *Collector) { - c.DisallowedDomains = domains - } -} - -// DisallowedURLFilters sets the list of regular expressions which restricts -// visiting URLs. If any of the rules matches to a URL the request will be stopped. -func DisallowedURLFilters(filters ...*regexp.Regexp) CollectorOption { - return func(c *Collector) { - c.DisallowedURLFilters = filters - } -} - -// URLFilters sets the list of regular expressions which restricts -// visiting URLs. If any of the rules matches to a URL the request won't be stopped. -func URLFilters(filters ...*regexp.Regexp) CollectorOption { - return func(c *Collector) { - c.URLFilters = filters - } -} - -// AllowURLRevisit instructs the Collector to allow multiple downloads of the same URL -func AllowURLRevisit() CollectorOption { - return func(c *Collector) { - c.AllowURLRevisit = true - } -} - -// MaxBodySize sets the limit of the retrieved response body in bytes. -func MaxBodySize(sizeInBytes int) CollectorOption { - return func(c *Collector) { - c.MaxBodySize = sizeInBytes - } -} - -// CacheDir specifies the location where GET requests are cached as files. -func CacheDir(path string) CollectorOption { - return func(c *Collector) { - c.CacheDir = path - } -} - -// IgnoreRobotsTxt instructs the Collector to ignore any restrictions -// set by the target host's robots.txt file. -func IgnoreRobotsTxt() CollectorOption { - return func(c *Collector) { - c.IgnoreRobotsTxt = true - } -} - -// TraceHTTP instructs the Collector to collect and report request trace data -// on the Response.Trace. -func TraceHTTP() CollectorOption { - return func(c *Collector) { - c.TraceHTTP = true - } -} - -// ID sets the unique identifier of the Collector. -func ID(id uint32) CollectorOption { - return func(c *Collector) { - c.ID = id - } -} - -// Async turns on asynchronous network requests. -func Async(a ...bool) CollectorOption { - return func(c *Collector) { - c.Async = true - } -} - -// DetectCharset enables character encoding detection for non-utf8 response bodies -// without explicit charset declaration. This feature uses https://github.com/saintfish/chardet -func DetectCharset() CollectorOption { - return func(c *Collector) { - c.DetectCharset = true - } -} - -// Debugger sets the debugger used by the Collector. -func Debugger(d debug.Debugger) CollectorOption { - return func(c *Collector) { - d.Init() - c.debugger = d - } -} - -// CheckHead performs a HEAD request before every GET to pre-validate the response -func CheckHead() CollectorOption { - return func(c *Collector) { - c.CheckHead = true - } -} - -// Init initializes the Collector's private variables and sets default -// configuration for the Collector -func (c *Collector) Init() { - c.UserAgent = "colly - https://github.com/gocolly/colly/v2" - c.MaxDepth = 0 - c.store = &storage.InMemoryStorage{} - c.store.Init() - c.MaxBodySize = 10 * 1024 * 1024 - c.backend = &httpBackend{} - jar, _ := cookiejar.New(nil) - c.backend.Init(jar) - c.backend.Client.CheckRedirect = c.checkRedirectFunc() - c.wg = &sync.WaitGroup{} - c.lock = &sync.RWMutex{} - c.robotsMap = make(map[string]*robotstxt.RobotsData) - c.IgnoreRobotsTxt = true - c.ID = atomic.AddUint32(&collectorCounter, 1) - c.TraceHTTP = false -} - -// Appengine will replace the Collector's backend http.Client -// With an Http.Client that is provided by appengine/urlfetch -// This function should be used when the scraper is run on -// Google App Engine. Example: -// func startScraper(w http.ResponseWriter, r *http.Request) { -// ctx := appengine.NewContext(r) -// c := colly.NewCollector() -// c.Appengine(ctx) -// ... -// c.Visit("https://google.ca") -// } -func (c *Collector) Appengine(ctx context.Context) { - client := urlfetch.Client(ctx) - client.Jar = c.backend.Client.Jar - client.CheckRedirect = c.backend.Client.CheckRedirect - client.Timeout = c.backend.Client.Timeout - - c.backend.Client = client -} - -// Visit starts Collector's collecting job by creating a -// request to the URL specified in parameter. -// Visit also calls the previously provided callbacks -func (c *Collector) Visit(URL string) error { - if c.CheckHead { - if check := c.scrape(URL, "HEAD", 1, nil, nil, nil, true); check != nil { - return check - } - } - return c.scrape(URL, "GET", 1, nil, nil, nil, true) -} - -// HasVisited checks if the provided URL has been visited -func (c *Collector) HasVisited(URL string) (bool, error) { - return c.checkHasVisited(URL, nil) -} - -// HasPosted checks if the provided URL and requestData has been visited -// This method is useful more likely to prevent re-visit same URL and POST body -func (c *Collector) HasPosted(URL string, requestData map[string]string) (bool, error) { - return c.checkHasVisited(URL, requestData) -} - -// Head starts a collector job by creating a HEAD request. -func (c *Collector) Head(URL string) error { - return c.scrape(URL, "HEAD", 1, nil, nil, nil, false) -} - -// Post starts a collector job by creating a POST request. -// Post also calls the previously provided callbacks -func (c *Collector) Post(URL string, requestData map[string]string) error { - return c.scrape(URL, "POST", 1, createFormReader(requestData), nil, nil, true) -} - -// PostRaw starts a collector job by creating a POST request with raw binary data. -// Post also calls the previously provided callbacks -func (c *Collector) PostRaw(URL string, requestData []byte) error { - return c.scrape(URL, "POST", 1, bytes.NewReader(requestData), nil, nil, true) -} - -// PostMultipart starts a collector job by creating a Multipart POST request -// with raw binary data. PostMultipart also calls the previously provided callbacks -func (c *Collector) PostMultipart(URL string, requestData map[string][]byte) error { - boundary := randomBoundary() - hdr := http.Header{} - hdr.Set("Content-Type", "multipart/form-data; boundary="+boundary) - hdr.Set("User-Agent", c.UserAgent) - return c.scrape(URL, "POST", 1, createMultipartReader(boundary, requestData), nil, hdr, true) -} - -// Request starts a collector job by creating a custom HTTP request -// where method, context, headers and request data can be specified. -// Set requestData, ctx, hdr parameters to nil if you don't want to use them. -// Valid methods: -// - "GET" -// - "HEAD" -// - "POST" -// - "PUT" -// - "DELETE" -// - "PATCH" -// - "OPTIONS" -func (c *Collector) Request(method, URL string, requestData io.Reader, ctx *Context, hdr http.Header) error { - return c.scrape(URL, method, 1, requestData, ctx, hdr, true) -} - -// SetDebugger attaches a debugger to the collector -func (c *Collector) SetDebugger(d debug.Debugger) { - d.Init() - c.debugger = d -} - -// UnmarshalRequest creates a Request from serialized data -func (c *Collector) UnmarshalRequest(r []byte) (*Request, error) { - req := &serializableRequest{} - err := json.Unmarshal(r, req) - if err != nil { - return nil, err - } - - u, err := url.Parse(req.URL) - if err != nil { - return nil, err - } - - ctx := NewContext() - for k, v := range req.Ctx { - ctx.Put(k, v) - } - - return &Request{ - Method: req.Method, - URL: u, - Depth: req.Depth, - Body: bytes.NewReader(req.Body), - Ctx: ctx, - ID: atomic.AddUint32(&c.requestCount, 1), - Headers: &req.Headers, - collector: c, - }, nil -} - -func (c *Collector) scrape(u, method string, depth int, requestData io.Reader, ctx *Context, hdr http.Header, checkRevisit bool) error { - parsedURL, err := url.Parse(u) - if err != nil { - return err - } - if err := c.requestCheck(u, parsedURL, method, requestData, depth, checkRevisit); err != nil { - return err - } - - if hdr == nil { - hdr = http.Header{"User-Agent": []string{c.UserAgent}} - } - rc, ok := requestData.(io.ReadCloser) - if !ok && requestData != nil { - rc = ioutil.NopCloser(requestData) - } - // The Go HTTP API ignores "Host" in the headers, preferring the client - // to use the Host field on Request. - host := parsedURL.Host - if hostHeader := hdr.Get("Host"); hostHeader != "" { - host = hostHeader - } - req := &http.Request{ - Method: method, - URL: parsedURL, - Proto: "HTTP/1.1", - ProtoMajor: 1, - ProtoMinor: 1, - Header: hdr, - Body: rc, - Host: host, - } - setRequestBody(req, requestData) - u = parsedURL.String() - c.wg.Add(1) - if c.Async { - go c.fetch(u, method, depth, requestData, ctx, hdr, req) - return nil - } - return c.fetch(u, method, depth, requestData, ctx, hdr, req) -} - -func setRequestBody(req *http.Request, body io.Reader) { - if body != nil { - switch v := body.(type) { - case *bytes.Buffer: - req.ContentLength = int64(v.Len()) - buf := v.Bytes() - req.GetBody = func() (io.ReadCloser, error) { - r := bytes.NewReader(buf) - return ioutil.NopCloser(r), nil - } - case *bytes.Reader: - req.ContentLength = int64(v.Len()) - snapshot := *v - req.GetBody = func() (io.ReadCloser, error) { - r := snapshot - return ioutil.NopCloser(&r), nil - } - case *strings.Reader: - req.ContentLength = int64(v.Len()) - snapshot := *v - req.GetBody = func() (io.ReadCloser, error) { - r := snapshot - return ioutil.NopCloser(&r), nil - } - } - if req.GetBody != nil && req.ContentLength == 0 { - req.Body = http.NoBody - req.GetBody = func() (io.ReadCloser, error) { return http.NoBody, nil } - } - } -} - -func (c *Collector) fetch(u, method string, depth int, requestData io.Reader, ctx *Context, hdr http.Header, req *http.Request) error { - defer c.wg.Done() - if ctx == nil { - ctx = NewContext() - } - request := &Request{ - URL: req.URL, - Headers: &req.Header, - Ctx: ctx, - Depth: depth, - Method: method, - Body: requestData, - collector: c, - ID: atomic.AddUint32(&c.requestCount, 1), - } - - c.handleOnRequest(request) - - if request.abort { - return nil - } - - if method == "POST" && req.Header.Get("Content-Type") == "" { - req.Header.Add("Content-Type", "application/x-www-form-urlencoded") - } - - if req.Header.Get("Accept") == "" { - req.Header.Set("Accept", "*/*") - } - - var hTrace *HTTPTrace - if c.TraceHTTP { - hTrace = &HTTPTrace{} - req = hTrace.WithTrace(req) - } - checkHeadersFunc := func(statusCode int, headers http.Header) bool { - c.handleOnResponseHeaders(&Response{Ctx: ctx, Request: request, StatusCode: statusCode, Headers: &headers}) - return !request.abort - } - - origURL := req.URL - response, err := c.backend.Cache(req, c.MaxBodySize, checkHeadersFunc, c.CacheDir) - if proxyURL, ok := req.Context().Value(ProxyURLKey).(string); ok { - request.ProxyURL = proxyURL - } - if err := c.handleOnError(response, err, request, ctx); err != nil { - return err - } - if req.URL != origURL { - request.URL = req.URL - request.Headers = &req.Header - } - atomic.AddUint32(&c.responseCount, 1) - response.Ctx = ctx - response.Request = request - response.Trace = hTrace - - err = response.fixCharset(c.DetectCharset, request.ResponseCharacterEncoding) - if err != nil { - return err - } - - c.handleOnResponse(response) - - err = c.handleOnHTML(response) - if err != nil { - c.handleOnError(response, err, request, ctx) - } - - err = c.handleOnXML(response) - if err != nil { - c.handleOnError(response, err, request, ctx) - } - - c.handleOnScraped(response) - - return err -} - -func (c *Collector) requestCheck(u string, parsedURL *url.URL, method string, requestData io.Reader, depth int, checkRevisit bool) error { - if u == "" { - return ErrMissingURL - } - if c.MaxDepth > 0 && c.MaxDepth < depth { - return ErrMaxDepth - } - if len(c.DisallowedURLFilters) > 0 { - if isMatchingFilter(c.DisallowedURLFilters, []byte(u)) { - return ErrForbiddenURL - } - } - if len(c.URLFilters) > 0 { - if !isMatchingFilter(c.URLFilters, []byte(u)) { - return ErrNoURLFiltersMatch - } - } - if !c.isDomainAllowed(parsedURL.Hostname()) { - return ErrForbiddenDomain - } - if method != "HEAD" && !c.IgnoreRobotsTxt { - if err := c.checkRobots(parsedURL); err != nil { - return err - } - } - if checkRevisit && !c.AllowURLRevisit { - h := fnv.New64a() - h.Write([]byte(u)) - - var uHash uint64 - if method == "GET" { - uHash = h.Sum64() - } else if requestData != nil { - h.Write(streamToByte(requestData)) - uHash = h.Sum64() - } else { - return nil - } - - visited, err := c.store.IsVisited(uHash) - if err != nil { - return err - } - if visited { - return ErrAlreadyVisited - } - return c.store.Visited(uHash) - } - return nil -} - -func (c *Collector) isDomainAllowed(domain string) bool { - for _, d2 := range c.DisallowedDomains { - if d2 == domain { - return false - } - } - if c.AllowedDomains == nil || len(c.AllowedDomains) == 0 { - return true - } - for _, d2 := range c.AllowedDomains { - if d2 == domain { - return true - } - } - return false -} - -func (c *Collector) checkRobots(u *url.URL) error { - c.lock.RLock() - robot, ok := c.robotsMap[u.Host] - c.lock.RUnlock() - - if !ok { - // no robots file cached - resp, err := c.backend.Client.Get(u.Scheme + "://" + u.Host + "/robots.txt") - if err != nil { - return err - } - defer resp.Body.Close() - - robot, err = robotstxt.FromResponse(resp) - if err != nil { - return err - } - c.lock.Lock() - c.robotsMap[u.Host] = robot - c.lock.Unlock() - } - - uaGroup := robot.FindGroup(c.UserAgent) - if uaGroup == nil { - return nil - } - - eu := u.EscapedPath() - if u.RawQuery != "" { - eu += "?" + u.Query().Encode() - } - if !uaGroup.Test(eu) { - return ErrRobotsTxtBlocked - } - return nil -} - -// String is the text representation of the collector. -// It contains useful debug information about the collector's internals -func (c *Collector) String() string { - return fmt.Sprintf( - "Requests made: %d (%d responses) | Callbacks: OnRequest: %d, OnHTML: %d, OnResponse: %d, OnError: %d", - c.requestCount, - c.responseCount, - len(c.requestCallbacks), - len(c.htmlCallbacks), - len(c.responseCallbacks), - len(c.errorCallbacks), - ) -} - -// Wait returns when the collector jobs are finished -func (c *Collector) Wait() { - c.wg.Wait() -} - -// OnRequest registers a function. Function will be executed on every -// request made by the Collector -func (c *Collector) OnRequest(f RequestCallback) { - c.lock.Lock() - if c.requestCallbacks == nil { - c.requestCallbacks = make([]RequestCallback, 0, 4) - } - c.requestCallbacks = append(c.requestCallbacks, f) - c.lock.Unlock() -} - -// OnResponseHeaders registers a function. Function will be executed on every response -// when headers and status are already received, but body is not yet read. -// -// Like in OnRequest, you can call Request.Abort to abort the transfer. This might be -// useful if, for example, you're following all hyperlinks, but want to avoid -// downloading files. -// -// Be aware that using this will prevent HTTP/1.1 connection reuse, as -// the only way to abort a download is to immediately close the connection. -// HTTP/2 doesn't suffer from this problem, as it's possible to close -// specific stream inside the connection. -func (c *Collector) OnResponseHeaders(f ResponseHeadersCallback) { - c.lock.Lock() - c.responseHeadersCallbacks = append(c.responseHeadersCallbacks, f) - c.lock.Unlock() -} - -// OnResponse registers a function. Function will be executed on every response -func (c *Collector) OnResponse(f ResponseCallback) { - c.lock.Lock() - if c.responseCallbacks == nil { - c.responseCallbacks = make([]ResponseCallback, 0, 4) - } - c.responseCallbacks = append(c.responseCallbacks, f) - c.lock.Unlock() -} - -// OnHTML registers a function. Function will be executed on every HTML -// element matched by the GoQuery Selector parameter. -// GoQuery Selector is a selector used by https://github.com/PuerkitoBio/goquery -func (c *Collector) OnHTML(goquerySelector string, f HTMLCallback) { - c.lock.Lock() - if c.htmlCallbacks == nil { - c.htmlCallbacks = make([]*htmlCallbackContainer, 0, 4) - } - c.htmlCallbacks = append(c.htmlCallbacks, &htmlCallbackContainer{ - Selector: goquerySelector, - Function: f, - }) - c.lock.Unlock() -} - -// OnXML registers a function. Function will be executed on every XML -// element matched by the xpath Query parameter. -// xpath Query is used by https://github.com/antchfx/xmlquery -func (c *Collector) OnXML(xpathQuery string, f XMLCallback) { - c.lock.Lock() - if c.xmlCallbacks == nil { - c.xmlCallbacks = make([]*xmlCallbackContainer, 0, 4) - } - c.xmlCallbacks = append(c.xmlCallbacks, &xmlCallbackContainer{ - Query: xpathQuery, - Function: f, - }) - c.lock.Unlock() -} - -// OnHTMLDetach deregister a function. Function will not be execute after detached -func (c *Collector) OnHTMLDetach(goquerySelector string) { - c.lock.Lock() - deleteIdx := -1 - for i, cc := range c.htmlCallbacks { - if cc.Selector == goquerySelector { - deleteIdx = i - break - } - } - if deleteIdx != -1 { - c.htmlCallbacks = append(c.htmlCallbacks[:deleteIdx], c.htmlCallbacks[deleteIdx+1:]...) - } - c.lock.Unlock() -} - -// OnXMLDetach deregister a function. Function will not be execute after detached -func (c *Collector) OnXMLDetach(xpathQuery string) { - c.lock.Lock() - deleteIdx := -1 - for i, cc := range c.xmlCallbacks { - if cc.Query == xpathQuery { - deleteIdx = i - break - } - } - if deleteIdx != -1 { - c.xmlCallbacks = append(c.xmlCallbacks[:deleteIdx], c.xmlCallbacks[deleteIdx+1:]...) - } - c.lock.Unlock() -} - -// OnError registers a function. Function will be executed if an error -// occurs during the HTTP request. -func (c *Collector) OnError(f ErrorCallback) { - c.lock.Lock() - if c.errorCallbacks == nil { - c.errorCallbacks = make([]ErrorCallback, 0, 4) - } - c.errorCallbacks = append(c.errorCallbacks, f) - c.lock.Unlock() -} - -// OnScraped registers a function. Function will be executed after -// OnHTML, as a final part of the scraping. -func (c *Collector) OnScraped(f ScrapedCallback) { - c.lock.Lock() - if c.scrapedCallbacks == nil { - c.scrapedCallbacks = make([]ScrapedCallback, 0, 4) - } - c.scrapedCallbacks = append(c.scrapedCallbacks, f) - c.lock.Unlock() -} - -// SetClient will override the previously set http.Client -func (c *Collector) SetClient(client *http.Client) { - c.backend.Client = client -} - -// WithTransport allows you to set a custom http.RoundTripper (transport) -func (c *Collector) WithTransport(transport http.RoundTripper) { - c.backend.Client.Transport = transport -} - -// DisableCookies turns off cookie handling -func (c *Collector) DisableCookies() { - c.backend.Client.Jar = nil -} - -// SetCookieJar overrides the previously set cookie jar -func (c *Collector) SetCookieJar(j http.CookieJar) { - c.backend.Client.Jar = j -} - -// SetRequestTimeout overrides the default timeout (10 seconds) for this collector -func (c *Collector) SetRequestTimeout(timeout time.Duration) { - c.backend.Client.Timeout = timeout -} - -// SetStorage overrides the default in-memory storage. -// Storage stores scraping related data like cookies and visited urls -func (c *Collector) SetStorage(s storage.Storage) error { - if err := s.Init(); err != nil { - return err - } - c.store = s - c.backend.Client.Jar = createJar(s) - return nil -} - -// SetProxy sets a proxy for the collector. This method overrides the previously -// used http.Transport if the type of the transport is not http.RoundTripper. -// The proxy type is determined by the URL scheme. "http" -// and "socks5" are supported. If the scheme is empty, -// "http" is assumed. -func (c *Collector) SetProxy(proxyURL string) error { - proxyParsed, err := url.Parse(proxyURL) - if err != nil { - return err - } - - c.SetProxyFunc(http.ProxyURL(proxyParsed)) - - return nil -} - -// SetProxyFunc sets a custom proxy setter/switcher function. -// See built-in ProxyFuncs for more details. -// This method overrides the previously used http.Transport -// if the type of the transport is not http.RoundTripper. -// The proxy type is determined by the URL scheme. "http" -// and "socks5" are supported. If the scheme is empty, -// "http" is assumed. -func (c *Collector) SetProxyFunc(p ProxyFunc) { - t, ok := c.backend.Client.Transport.(*http.Transport) - if c.backend.Client.Transport != nil && ok { - t.Proxy = p - } else { - c.backend.Client.Transport = &http.Transport{ - Proxy: p, - } - } -} - -func createEvent(eventType string, requestID, collectorID uint32, kvargs map[string]string) *debug.Event { - return &debug.Event{ - CollectorID: collectorID, - RequestID: requestID, - Type: eventType, - Values: kvargs, - } -} - -func (c *Collector) handleOnRequest(r *Request) { - if c.debugger != nil { - c.debugger.Event(createEvent("request", r.ID, c.ID, map[string]string{ - "url": r.URL.String(), - })) - } - for _, f := range c.requestCallbacks { - f(r) - } -} - -func (c *Collector) handleOnResponse(r *Response) { - if c.debugger != nil { - c.debugger.Event(createEvent("response", r.Request.ID, c.ID, map[string]string{ - "url": r.Request.URL.String(), - "status": http.StatusText(r.StatusCode), - })) - } - for _, f := range c.responseCallbacks { - f(r) - } -} - -func (c *Collector) handleOnResponseHeaders(r *Response) { - if c.debugger != nil { - c.debugger.Event(createEvent("responseHeaders", r.Request.ID, c.ID, map[string]string{ - "url": r.Request.URL.String(), - "status": http.StatusText(r.StatusCode), - })) - } - for _, f := range c.responseHeadersCallbacks { - f(r) - } -} - -func (c *Collector) handleOnHTML(resp *Response) error { - if len(c.htmlCallbacks) == 0 || !strings.Contains(strings.ToLower(resp.Headers.Get("Content-Type")), "html") { - return nil - } - doc, err := goquery.NewDocumentFromReader(bytes.NewBuffer(resp.Body)) - if err != nil { - return err - } - if href, found := doc.Find("base[href]").Attr("href"); found { - resp.Request.baseURL, _ = resp.Request.URL.Parse(href) - } - for _, cc := range c.htmlCallbacks { - i := 0 - doc.Find(cc.Selector).Each(func(_ int, s *goquery.Selection) { - for _, n := range s.Nodes { - e := NewHTMLElementFromSelectionNode(resp, s, n, i) - i++ - if c.debugger != nil { - c.debugger.Event(createEvent("html", resp.Request.ID, c.ID, map[string]string{ - "selector": cc.Selector, - "url": resp.Request.URL.String(), - })) - } - cc.Function(e) - } - }) - } - return nil -} - -func (c *Collector) handleOnXML(resp *Response) error { - if len(c.xmlCallbacks) == 0 { - return nil - } - contentType := strings.ToLower(resp.Headers.Get("Content-Type")) - isXMLFile := strings.HasSuffix(strings.ToLower(resp.Request.URL.Path), ".xml") || strings.HasSuffix(strings.ToLower(resp.Request.URL.Path), ".xml.gz") - if !strings.Contains(contentType, "html") && (!strings.Contains(contentType, "xml") && !isXMLFile) { - return nil - } - - if strings.Contains(contentType, "html") { - doc, err := htmlquery.Parse(bytes.NewBuffer(resp.Body)) - if err != nil { - return err - } - if e := htmlquery.FindOne(doc, "//base"); e != nil { - for _, a := range e.Attr { - if a.Key == "href" { - resp.Request.baseURL, _ = resp.Request.URL.Parse(a.Val) - break - } - } - } - - for _, cc := range c.xmlCallbacks { - for _, n := range htmlquery.Find(doc, cc.Query) { - e := NewXMLElementFromHTMLNode(resp, n) - if c.debugger != nil { - c.debugger.Event(createEvent("xml", resp.Request.ID, c.ID, map[string]string{ - "selector": cc.Query, - "url": resp.Request.URL.String(), - })) - } - cc.Function(e) - } - } - } else if strings.Contains(contentType, "xml") || isXMLFile { - doc, err := xmlquery.Parse(bytes.NewBuffer(resp.Body)) - if err != nil { - return err - } - - for _, cc := range c.xmlCallbacks { - xmlquery.FindEach(doc, cc.Query, func(i int, n *xmlquery.Node) { - e := NewXMLElementFromXMLNode(resp, n) - if c.debugger != nil { - c.debugger.Event(createEvent("xml", resp.Request.ID, c.ID, map[string]string{ - "selector": cc.Query, - "url": resp.Request.URL.String(), - })) - } - cc.Function(e) - }) - } - } - return nil -} - -func (c *Collector) handleOnError(response *Response, err error, request *Request, ctx *Context) error { - if err == nil && (c.ParseHTTPErrorResponse || response.StatusCode < 203) { - return nil - } - if err == nil && response.StatusCode >= 203 { - err = errors.New(http.StatusText(response.StatusCode)) - } - if response == nil { - response = &Response{ - Request: request, - Ctx: ctx, - } - } - if c.debugger != nil { - c.debugger.Event(createEvent("error", request.ID, c.ID, map[string]string{ - "url": request.URL.String(), - "status": http.StatusText(response.StatusCode), - })) - } - if response.Request == nil { - response.Request = request - } - if response.Ctx == nil { - response.Ctx = request.Ctx - } - for _, f := range c.errorCallbacks { - f(response, err) - } - return err -} - -func (c *Collector) handleOnScraped(r *Response) { - if c.debugger != nil { - c.debugger.Event(createEvent("scraped", r.Request.ID, c.ID, map[string]string{ - "url": r.Request.URL.String(), - })) - } - for _, f := range c.scrapedCallbacks { - f(r) - } -} - -// Limit adds a new LimitRule to the collector -func (c *Collector) Limit(rule *LimitRule) error { - return c.backend.Limit(rule) -} - -// Limits adds new LimitRules to the collector -func (c *Collector) Limits(rules []*LimitRule) error { - return c.backend.Limits(rules) -} - -// SetRedirectHandler instructs the Collector to allow multiple downloads of the same URL -func (c *Collector) SetRedirectHandler(f func(req *http.Request, via []*http.Request) error) { - c.redirectHandler = f - c.backend.Client.CheckRedirect = c.checkRedirectFunc() -} - -// SetCookies handles the receipt of the cookies in a reply for the given URL -func (c *Collector) SetCookies(URL string, cookies []*http.Cookie) error { - if c.backend.Client.Jar == nil { - return ErrNoCookieJar - } - u, err := url.Parse(URL) - if err != nil { - return err - } - c.backend.Client.Jar.SetCookies(u, cookies) - return nil -} - -// Cookies returns the cookies to send in a request for the given URL. -func (c *Collector) Cookies(URL string) []*http.Cookie { - if c.backend.Client.Jar == nil { - return nil - } - u, err := url.Parse(URL) - if err != nil { - return nil - } - return c.backend.Client.Jar.Cookies(u) -} - -// Clone creates an exact copy of a Collector without callbacks. -// HTTP backend, robots.txt cache and cookie jar are shared -// between collectors. -func (c *Collector) Clone() *Collector { - return &Collector{ - AllowedDomains: c.AllowedDomains, - AllowURLRevisit: c.AllowURLRevisit, - CacheDir: c.CacheDir, - DetectCharset: c.DetectCharset, - DisallowedDomains: c.DisallowedDomains, - ID: atomic.AddUint32(&collectorCounter, 1), - IgnoreRobotsTxt: c.IgnoreRobotsTxt, - MaxBodySize: c.MaxBodySize, - MaxDepth: c.MaxDepth, - DisallowedURLFilters: c.DisallowedURLFilters, - URLFilters: c.URLFilters, - CheckHead: c.CheckHead, - ParseHTTPErrorResponse: c.ParseHTTPErrorResponse, - UserAgent: c.UserAgent, - TraceHTTP: c.TraceHTTP, - store: c.store, - backend: c.backend, - debugger: c.debugger, - Async: c.Async, - redirectHandler: c.redirectHandler, - errorCallbacks: make([]ErrorCallback, 0, 8), - htmlCallbacks: make([]*htmlCallbackContainer, 0, 8), - xmlCallbacks: make([]*xmlCallbackContainer, 0, 8), - scrapedCallbacks: make([]ScrapedCallback, 0, 8), - lock: c.lock, - requestCallbacks: make([]RequestCallback, 0, 8), - responseCallbacks: make([]ResponseCallback, 0, 8), - robotsMap: c.robotsMap, - wg: &sync.WaitGroup{}, - } -} - -func (c *Collector) checkRedirectFunc() func(req *http.Request, via []*http.Request) error { - return func(req *http.Request, via []*http.Request) error { - if !c.isDomainAllowed(req.URL.Hostname()) { - return fmt.Errorf("Not following redirect to %s because its not in AllowedDomains", req.URL.Host) - } - - if c.redirectHandler != nil { - return c.redirectHandler(req, via) - } - - // Honor golangs default of maximum of 10 redirects - if len(via) >= 10 { - return http.ErrUseLastResponse - } - - lastRequest := via[len(via)-1] - - // If domain has changed, remove the Authorization-header if it exists - if req.URL.Host != lastRequest.URL.Host { - req.Header.Del("Authorization") - } - - return nil - } -} - -func (c *Collector) parseSettingsFromEnv() { - for _, e := range os.Environ() { - if !strings.HasPrefix(e, "COLLY_") { - continue - } - pair := strings.SplitN(e[6:], "=", 2) - if f, ok := envMap[pair[0]]; ok { - f(c, pair[1]) - } else { - log.Println("Unknown environment variable:", pair[0]) - } - } -} - -func (c *Collector) checkHasVisited(URL string, requestData map[string]string) (bool, error) { - h := fnv.New64a() - h.Write([]byte(URL)) - - if requestData != nil { - h.Write(streamToByte(createFormReader(requestData))) - } - - return c.store.IsVisited(h.Sum64()) -} - -// SanitizeFileName replaces dangerous characters in a string -// so the return value can be used as a safe file name. -func SanitizeFileName(fileName string) string { - ext := filepath.Ext(fileName) - cleanExt := sanitize.BaseName(ext) - if cleanExt == "" { - cleanExt = ".unknown" - } - return strings.Replace(fmt.Sprintf( - "%s.%s", - sanitize.BaseName(fileName[:len(fileName)-len(ext)]), - cleanExt[1:], - ), "-", "_", -1) -} - -func createFormReader(data map[string]string) io.Reader { - form := url.Values{} - for k, v := range data { - form.Add(k, v) - } - return strings.NewReader(form.Encode()) -} - -func createMultipartReader(boundary string, data map[string][]byte) io.Reader { - dashBoundary := "--" + boundary - - body := []byte{} - buffer := bytes.NewBuffer(body) - - buffer.WriteString("Content-type: multipart/form-data; boundary=" + boundary + "\n\n") - for contentType, content := range data { - buffer.WriteString(dashBoundary + "\n") - buffer.WriteString("Content-Disposition: form-data; name=" + contentType + "\n") - buffer.WriteString(fmt.Sprintf("Content-Length: %d \n\n", len(content))) - buffer.Write(content) - buffer.WriteString("\n") - } - buffer.WriteString(dashBoundary + "--\n\n") - return buffer -} - -// randomBoundary was borrowed from -// github.com/golang/go/mime/multipart/writer.go#randomBoundary -func randomBoundary() string { - var buf [30]byte - _, err := io.ReadFull(rand.Reader, buf[:]) - if err != nil { - panic(err) - } - return fmt.Sprintf("%x", buf[:]) -} - -func isYesString(s string) bool { - switch strings.ToLower(s) { - case "1", "yes", "true", "y": - return true - } - return false -} - -func createJar(s storage.Storage) http.CookieJar { - return &cookieJarSerializer{store: s, lock: &sync.RWMutex{}} -} - -func (j *cookieJarSerializer) SetCookies(u *url.URL, cookies []*http.Cookie) { - j.lock.Lock() - defer j.lock.Unlock() - cookieStr := j.store.Cookies(u) - - // Merge existing cookies, new cookies have precedence. - cnew := make([]*http.Cookie, len(cookies)) - copy(cnew, cookies) - existing := storage.UnstringifyCookies(cookieStr) - for _, c := range existing { - if !storage.ContainsCookie(cnew, c.Name) { - cnew = append(cnew, c) - } - } - j.store.SetCookies(u, storage.StringifyCookies(cnew)) -} - -func (j *cookieJarSerializer) Cookies(u *url.URL) []*http.Cookie { - cookies := storage.UnstringifyCookies(j.store.Cookies(u)) - // Filter. - now := time.Now() - cnew := make([]*http.Cookie, 0, len(cookies)) - for _, c := range cookies { - // Drop expired cookies. - if c.RawExpires != "" && c.Expires.Before(now) { - continue - } - // Drop secure cookies if not over https. - if c.Secure && u.Scheme != "https" { - continue - } - cnew = append(cnew, c) - } - return cnew -} - -func isMatchingFilter(fs []*regexp.Regexp, d []byte) bool { - for _, r := range fs { - if r.Match(d) { - return true - } - } - return false -} - -func streamToByte(r io.Reader) []byte { - buf := new(bytes.Buffer) - buf.ReadFrom(r) - - if strReader, k := r.(*strings.Reader); k { - strReader.Seek(0, 0) - } else if bReader, kb := r.(*bytes.Reader); kb { - bReader.Seek(0, 0) - } - - return buf.Bytes() -} diff --git a/vendor/github.com/gocolly/colly/v2/context.go b/vendor/github.com/gocolly/colly/v2/context.go deleted file mode 100644 index 4bc11b9..0000000 --- a/vendor/github.com/gocolly/colly/v2/context.go +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright 2018 Adam Tauber -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package colly - -import ( - "sync" -) - -// Context provides a tiny layer for passing data between callbacks -type Context struct { - contextMap map[string]interface{} - lock *sync.RWMutex -} - -// NewContext initializes a new Context instance -func NewContext() *Context { - return &Context{ - contextMap: make(map[string]interface{}), - lock: &sync.RWMutex{}, - } -} - -// UnmarshalBinary decodes Context value to nil -// This function is used by request caching -func (c *Context) UnmarshalBinary(_ []byte) error { - return nil -} - -// MarshalBinary encodes Context value -// This function is used by request caching -func (c *Context) MarshalBinary() (_ []byte, _ error) { - return nil, nil -} - -// Put stores a value of any type in Context -func (c *Context) Put(key string, value interface{}) { - c.lock.Lock() - c.contextMap[key] = value - c.lock.Unlock() -} - -// Get retrieves a string value from Context. -// Get returns an empty string if key not found -func (c *Context) Get(key string) string { - c.lock.RLock() - defer c.lock.RUnlock() - if v, ok := c.contextMap[key]; ok { - return v.(string) - } - return "" -} - -// GetAny retrieves a value from Context. -// GetAny returns nil if key not found -func (c *Context) GetAny(key string) interface{} { - c.lock.RLock() - defer c.lock.RUnlock() - if v, ok := c.contextMap[key]; ok { - return v - } - return nil -} - -// ForEach iterate context -func (c *Context) ForEach(fn func(k string, v interface{}) interface{}) []interface{} { - c.lock.RLock() - defer c.lock.RUnlock() - - ret := make([]interface{}, 0, len(c.contextMap)) - for k, v := range c.contextMap { - ret = append(ret, fn(k, v)) - } - - return ret -} diff --git a/vendor/github.com/gocolly/colly/v2/debug/BUILD.bazel b/vendor/github.com/gocolly/colly/v2/debug/BUILD.bazel deleted file mode 100644 index fc08084..0000000 --- a/vendor/github.com/gocolly/colly/v2/debug/BUILD.bazel +++ /dev/null @@ -1,13 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -go_library( - name = "debug", - srcs = [ - "debug.go", - "logdebugger.go", - "webdebugger.go", - ], - importmap = "peridot.resf.org/vendor/github.com/gocolly/colly/v2/debug", - importpath = "github.com/gocolly/colly/v2/debug", - visibility = ["//visibility:public"], -) diff --git a/vendor/github.com/gocolly/colly/v2/debug/debug.go b/vendor/github.com/gocolly/colly/v2/debug/debug.go deleted file mode 100644 index 705d0f7..0000000 --- a/vendor/github.com/gocolly/colly/v2/debug/debug.go +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2018 Adam Tauber -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package debug - -// Event represents an action inside a collector -type Event struct { - // Type is the type of the event - Type string - // RequestID identifies the HTTP request of the Event - RequestID uint32 - // CollectorID identifies the collector of the Event - CollectorID uint32 - // Values contains the event's key-value pairs. Different type of events - // can return different key-value pairs - Values map[string]string -} - -// Debugger is an interface for different type of debugging backends -type Debugger interface { - // Init initializes the backend - Init() error - // Event receives a new collector event. - Event(e *Event) -} diff --git a/vendor/github.com/gocolly/colly/v2/debug/logdebugger.go b/vendor/github.com/gocolly/colly/v2/debug/logdebugger.go deleted file mode 100644 index f866b6d..0000000 --- a/vendor/github.com/gocolly/colly/v2/debug/logdebugger.go +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2018 Adam Tauber -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package debug - -import ( - "io" - "log" - "os" - "sync/atomic" - "time" -) - -// LogDebugger is the simplest debugger which prints log messages to the STDERR -type LogDebugger struct { - // Output is the log destination, anything can be used which implements them - // io.Writer interface. Leave it blank to use STDERR - Output io.Writer - // Prefix appears at the beginning of each generated log line - Prefix string - // Flag defines the logging properties. - Flag int - logger *log.Logger - counter int32 - start time.Time -} - -// Init initializes the LogDebugger -func (l *LogDebugger) Init() error { - l.counter = 0 - l.start = time.Now() - if l.Output == nil { - l.Output = os.Stderr - } - l.logger = log.New(l.Output, l.Prefix, l.Flag) - return nil -} - -// Event receives Collector events and prints them to STDERR -func (l *LogDebugger) Event(e *Event) { - i := atomic.AddInt32(&l.counter, 1) - l.logger.Printf("[%06d] %d [%6d - %s] %q (%s)\n", i, e.CollectorID, e.RequestID, e.Type, e.Values, time.Since(l.start)) -} diff --git a/vendor/github.com/gocolly/colly/v2/debug/webdebugger.go b/vendor/github.com/gocolly/colly/v2/debug/webdebugger.go deleted file mode 100644 index 504a9eb..0000000 --- a/vendor/github.com/gocolly/colly/v2/debug/webdebugger.go +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright 2018 Adam Tauber -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package debug - -import ( - "encoding/json" - "log" - "net/http" - "sync" - "time" -) - -// WebDebugger is a web based debuging frontend for colly -type WebDebugger struct { - // Address is the address of the web server. It is 127.0.0.1:7676 by default. - Address string - initialized bool - CurrentRequests map[uint32]requestInfo - RequestLog []requestInfo - sync.Mutex -} - -type requestInfo struct { - URL string - Started time.Time - Duration time.Duration - ResponseStatus string - ID uint32 - CollectorID uint32 -} - -// Init initializes the WebDebugger -func (w *WebDebugger) Init() error { - if w.initialized { - return nil - } - defer func() { - w.initialized = true - }() - if w.Address == "" { - w.Address = "127.0.0.1:7676" - } - w.RequestLog = make([]requestInfo, 0) - w.CurrentRequests = make(map[uint32]requestInfo) - http.HandleFunc("/", w.indexHandler) - http.HandleFunc("/status", w.statusHandler) - log.Println("Starting debug webserver on", w.Address) - go http.ListenAndServe(w.Address, nil) - return nil -} - -// Event updates the debugger's status -func (w *WebDebugger) Event(e *Event) { - w.Lock() - defer w.Unlock() - - switch e.Type { - case "request": - w.CurrentRequests[e.RequestID] = requestInfo{ - URL: e.Values["url"], - Started: time.Now(), - ID: e.RequestID, - CollectorID: e.CollectorID, - } - case "response", "error": - r := w.CurrentRequests[e.RequestID] - r.Duration = time.Since(r.Started) - r.ResponseStatus = e.Values["status"] - w.RequestLog = append(w.RequestLog, r) - delete(w.CurrentRequests, e.RequestID) - } -} - -func (w *WebDebugger) indexHandler(wr http.ResponseWriter, r *http.Request) { - wr.Write([]byte(` - - - Colly Debugger WebUI - - - - - -
-
-
-

Current Requests

-
-
-
-

Finished Requests

-
-
-
-
- - - -`)) -} - -func (w *WebDebugger) statusHandler(wr http.ResponseWriter, r *http.Request) { - w.Lock() - jsonData, err := json.MarshalIndent(w, "", " ") - w.Unlock() - if err != nil { - panic(err) - } - wr.Write(jsonData) -} diff --git a/vendor/github.com/gocolly/colly/v2/go.mod b/vendor/github.com/gocolly/colly/v2/go.mod deleted file mode 100644 index ecc4025..0000000 --- a/vendor/github.com/gocolly/colly/v2/go.mod +++ /dev/null @@ -1,23 +0,0 @@ -module github.com/gocolly/colly/v2 - -go 1.12 - -require ( - github.com/PuerkitoBio/goquery v1.5.1 - github.com/andybalholm/cascadia v1.2.0 // indirect - github.com/antchfx/htmlquery v1.2.3 - github.com/antchfx/xmlquery v1.2.4 - github.com/antchfx/xpath v1.1.8 // indirect - github.com/gobwas/glob v0.2.3 - github.com/gocolly/colly v1.2.0 - github.com/golang/protobuf v1.4.2 // indirect - github.com/jawher/mow.cli v1.1.0 - github.com/kennygrant/sanitize v1.2.4 - github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca - github.com/temoto/robotstxt v1.1.1 - golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5 // indirect - golang.org/x/net v0.0.0-20200602114024-627f9648deb9 - golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b // indirect - google.golang.org/appengine v1.6.6 - google.golang.org/protobuf v1.24.0 // indirect -) diff --git a/vendor/github.com/gocolly/colly/v2/go.sum b/vendor/github.com/gocolly/colly/v2/go.sum deleted file mode 100644 index 89f33e0..0000000 --- a/vendor/github.com/gocolly/colly/v2/go.sum +++ /dev/null @@ -1,134 +0,0 @@ -cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk= -github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= -github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE= -github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= -github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= -github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= -github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= -github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE= -github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY= -github.com/antchfx/htmlquery v1.0.0 h1:O5IXz8fZF3B3MW+B33MZWbTHBlYmcfw0BAxgErHuaMA= -github.com/antchfx/htmlquery v1.0.0/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8= -github.com/antchfx/htmlquery v1.2.3 h1:sP3NFDneHx2stfNXCKbhHFo8XgNjCACnU/4AO5gWz6M= -github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0= -github.com/antchfx/xmlquery v1.0.0 h1:YuEPqexGG2opZKNc9JU3Zw6zFXwC47wNcy6/F8oKsrM= -github.com/antchfx/xmlquery v1.0.0/go.mod h1:/+CnyD/DzHRnv2eRxrVbieRU/FIF6N0C+7oTtyUtCKk= -github.com/antchfx/xmlquery v1.2.4 h1:T/SH1bYdzdjTMoz2RgsfVKbM5uWh3gjDYYepFqQmFv4= -github.com/antchfx/xmlquery v1.2.4/go.mod h1:KQQuESaxSlqugE2ZBcM/qn+ebIpt+d+4Xx7YcSGAIrM= -github.com/antchfx/xpath v1.0.0 h1:Q5gFgh2O40VTSwMOVbFE7nFNRBu3tS21Tn0KAWeEjtk= -github.com/antchfx/xpath v1.0.0/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= -github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= -github.com/antchfx/xpath v1.1.8 h1:PcL6bIX42Px5usSx6xRYw/wjB3wYGkj0MJ9MBzEKVgk= -github.com/antchfx/xpath v1.1.8/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= -github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= -github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= -github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= -github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI= -github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA= -github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= -github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= -github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= -github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= -github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= -github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= -github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= -github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0= -github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/jawher/mow.cli v1.1.0 h1:NdtHXRc0CwZQ507wMvQ/IS+Q3W3x2fycn973/b8Zuk8= -github.com/jawher/mow.cli v1.1.0/go.mod h1:aNaQlc7ozF3vw6IJ2dHjp2ZFiA4ozMIYY6PyuRJwlUg= -github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= -github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI= -github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= -github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/temoto/robotstxt v1.1.1 h1:Gh8RCs8ouX3hRSxxK7B1mO5RFByQ4CmJZDwgom++JaA= -github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20190724013045-ca1201d0de80 h1:Ao/3l156eZf2AW5wK8a7/smtodRU+gha3+BeqJ69lRk= -golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200602114024-627f9648deb9 h1:pNX+40auqi2JqRfOP1akLGtYcn15TUbkhwuCO3foqqM= -golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= -golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.6.1 h1:QzqyMA1tlu6CgqCDUtU9V+ZKhLFT2dkJuANu5QaxI3I= -google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= -google.golang.org/appengine v1.6.6 h1:lMO5rYAqUxkmaj76jAkRUvt5JZgFymx/+Q5Mzfivuhc= -google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= -google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= -google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= -google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= -google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= -google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM= -google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.24.0 h1:UhZDfRO8JRQru4/+LlLE0BRKGF8L+PICnvYZmx/fEGA= -google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= -honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/vendor/github.com/gocolly/colly/v2/htmlelement.go b/vendor/github.com/gocolly/colly/v2/htmlelement.go deleted file mode 100644 index 7128949..0000000 --- a/vendor/github.com/gocolly/colly/v2/htmlelement.go +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright 2018 Adam Tauber -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package colly - -import ( - "strings" - - "github.com/PuerkitoBio/goquery" - "golang.org/x/net/html" -) - -// HTMLElement is the representation of a HTML tag. -type HTMLElement struct { - // Name is the name of the tag - Name string - Text string - attributes []html.Attribute - // Request is the request object of the element's HTML document - Request *Request - // Response is the Response object of the element's HTML document - Response *Response - // DOM is the goquery parsed DOM object of the page. DOM is relative - // to the current HTMLElement - DOM *goquery.Selection - // Index stores the position of the current element within all the elements matched by an OnHTML callback - Index int -} - -// NewHTMLElementFromSelectionNode creates a HTMLElement from a goquery.Selection Node. -func NewHTMLElementFromSelectionNode(resp *Response, s *goquery.Selection, n *html.Node, idx int) *HTMLElement { - return &HTMLElement{ - Name: n.Data, - Request: resp.Request, - Response: resp, - Text: goquery.NewDocumentFromNode(n).Text(), - DOM: s, - Index: idx, - attributes: n.Attr, - } -} - -// Attr returns the selected attribute of a HTMLElement or empty string -// if no attribute found -func (h *HTMLElement) Attr(k string) string { - for _, a := range h.attributes { - if a.Key == k { - return a.Val - } - } - return "" -} - -// ChildText returns the concatenated and stripped text content of the matching -// elements. -func (h *HTMLElement) ChildText(goquerySelector string) string { - return strings.TrimSpace(h.DOM.Find(goquerySelector).Text()) -} - -// ChildTexts returns the stripped text content of all the matching -// elements. -func (h *HTMLElement) ChildTexts(goquerySelector string) []string { - var res []string - h.DOM.Find(goquerySelector).Each(func(_ int, s *goquery.Selection) { - - res = append(res, strings.TrimSpace(s.Text())) - }) - return res -} - -// ChildAttr returns the stripped text content of the first matching -// element's attribute. -func (h *HTMLElement) ChildAttr(goquerySelector, attrName string) string { - if attr, ok := h.DOM.Find(goquerySelector).Attr(attrName); ok { - return strings.TrimSpace(attr) - } - return "" -} - -// ChildAttrs returns the stripped text content of all the matching -// element's attributes. -func (h *HTMLElement) ChildAttrs(goquerySelector, attrName string) []string { - var res []string - h.DOM.Find(goquerySelector).Each(func(_ int, s *goquery.Selection) { - if attr, ok := s.Attr(attrName); ok { - res = append(res, strings.TrimSpace(attr)) - } - }) - return res -} - -// ForEach iterates over the elements matched by the first argument -// and calls the callback function on every HTMLElement match. -func (h *HTMLElement) ForEach(goquerySelector string, callback func(int, *HTMLElement)) { - i := 0 - h.DOM.Find(goquerySelector).Each(func(_ int, s *goquery.Selection) { - for _, n := range s.Nodes { - callback(i, NewHTMLElementFromSelectionNode(h.Response, s, n, i)) - i++ - } - }) -} - -// ForEachWithBreak iterates over the elements matched by the first argument -// and calls the callback function on every HTMLElement match. -// It is identical to ForEach except that it is possible to break -// out of the loop by returning false in the callback function. It returns the -// current Selection object. -func (h *HTMLElement) ForEachWithBreak(goquerySelector string, callback func(int, *HTMLElement) bool) { - i := 0 - h.DOM.Find(goquerySelector).EachWithBreak(func(_ int, s *goquery.Selection) bool { - for _, n := range s.Nodes { - if callback(i, NewHTMLElementFromSelectionNode(h.Response, s, n, i)) { - i++ - return true - } - } - return false - }) -} diff --git a/vendor/github.com/gocolly/colly/v2/http_backend.go b/vendor/github.com/gocolly/colly/v2/http_backend.go deleted file mode 100644 index fe96c9a..0000000 --- a/vendor/github.com/gocolly/colly/v2/http_backend.go +++ /dev/null @@ -1,237 +0,0 @@ -// Copyright 2018 Adam Tauber -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package colly - -import ( - "crypto/sha1" - "encoding/gob" - "encoding/hex" - "io" - "io/ioutil" - "math/rand" - "net/http" - "os" - "path" - "regexp" - "strings" - "sync" - "time" - - "compress/gzip" - - "github.com/gobwas/glob" -) - -type httpBackend struct { - LimitRules []*LimitRule - Client *http.Client - lock *sync.RWMutex -} - -type checkHeadersFunc func(statusCode int, header http.Header) bool - -// LimitRule provides connection restrictions for domains. -// Both DomainRegexp and DomainGlob can be used to specify -// the included domains patterns, but at least one is required. -// There can be two kind of limitations: -// - Parallelism: Set limit for the number of concurrent requests to matching domains -// - Delay: Wait specified amount of time between requests (parallelism is 1 in this case) -type LimitRule struct { - // DomainRegexp is a regular expression to match against domains - DomainRegexp string - // DomainGlob is a glob pattern to match against domains - DomainGlob string - // Delay is the duration to wait before creating a new request to the matching domains - Delay time.Duration - // RandomDelay is the extra randomized duration to wait added to Delay before creating a new request - RandomDelay time.Duration - // Parallelism is the number of the maximum allowed concurrent requests of the matching domains - Parallelism int - waitChan chan bool - compiledRegexp *regexp.Regexp - compiledGlob glob.Glob -} - -// Init initializes the private members of LimitRule -func (r *LimitRule) Init() error { - waitChanSize := 1 - if r.Parallelism > 1 { - waitChanSize = r.Parallelism - } - r.waitChan = make(chan bool, waitChanSize) - hasPattern := false - if r.DomainRegexp != "" { - c, err := regexp.Compile(r.DomainRegexp) - if err != nil { - return err - } - r.compiledRegexp = c - hasPattern = true - } - if r.DomainGlob != "" { - c, err := glob.Compile(r.DomainGlob) - if err != nil { - return err - } - r.compiledGlob = c - hasPattern = true - } - if !hasPattern { - return ErrNoPattern - } - return nil -} - -func (h *httpBackend) Init(jar http.CookieJar) { - rand.Seed(time.Now().UnixNano()) - h.Client = &http.Client{ - Jar: jar, - Timeout: 10 * time.Second, - } - h.lock = &sync.RWMutex{} -} - -// Match checks that the domain parameter triggers the rule -func (r *LimitRule) Match(domain string) bool { - match := false - if r.compiledRegexp != nil && r.compiledRegexp.MatchString(domain) { - match = true - } - if r.compiledGlob != nil && r.compiledGlob.Match(domain) { - match = true - } - return match -} - -func (h *httpBackend) GetMatchingRule(domain string) *LimitRule { - if h.LimitRules == nil { - return nil - } - h.lock.RLock() - defer h.lock.RUnlock() - for _, r := range h.LimitRules { - if r.Match(domain) { - return r - } - } - return nil -} - -func (h *httpBackend) Cache(request *http.Request, bodySize int, checkHeadersFunc checkHeadersFunc, cacheDir string) (*Response, error) { - if cacheDir == "" || request.Method != "GET" { - return h.Do(request, bodySize, checkHeadersFunc) - } - sum := sha1.Sum([]byte(request.URL.String())) - hash := hex.EncodeToString(sum[:]) - dir := path.Join(cacheDir, hash[:2]) - filename := path.Join(dir, hash) - if file, err := os.Open(filename); err == nil { - resp := new(Response) - err := gob.NewDecoder(file).Decode(resp) - file.Close() - if resp.StatusCode < 500 { - return resp, err - } - } - resp, err := h.Do(request, bodySize, checkHeadersFunc) - if err != nil || resp.StatusCode >= 500 { - return resp, err - } - if _, err := os.Stat(dir); err != nil { - if err := os.MkdirAll(dir, 0750); err != nil { - return resp, err - } - } - file, err := os.Create(filename + "~") - if err != nil { - return resp, err - } - if err := gob.NewEncoder(file).Encode(resp); err != nil { - file.Close() - return resp, err - } - file.Close() - return resp, os.Rename(filename+"~", filename) -} - -func (h *httpBackend) Do(request *http.Request, bodySize int, checkHeadersFunc checkHeadersFunc) (*Response, error) { - r := h.GetMatchingRule(request.URL.Host) - if r != nil { - r.waitChan <- true - defer func(r *LimitRule) { - randomDelay := time.Duration(0) - if r.RandomDelay != 0 { - randomDelay = time.Duration(rand.Int63n(int64(r.RandomDelay))) - } - time.Sleep(r.Delay + randomDelay) - <-r.waitChan - }(r) - } - - res, err := h.Client.Do(request) - if err != nil { - return nil, err - } - defer res.Body.Close() - if res.Request != nil { - *request = *res.Request - } - if !checkHeadersFunc(res.StatusCode, res.Header) { - // closing res.Body (see defer above) without reading it aborts - // the download - return nil, ErrAbortedAfterHeaders - } - - var bodyReader io.Reader = res.Body - if bodySize > 0 { - bodyReader = io.LimitReader(bodyReader, int64(bodySize)) - } - contentEncoding := strings.ToLower(res.Header.Get("Content-Encoding")) - if !res.Uncompressed && (strings.Contains(contentEncoding, "gzip") || (contentEncoding == "" && strings.Contains(strings.ToLower(res.Header.Get("Content-Type")), "gzip")) || strings.HasSuffix(strings.ToLower(request.URL.Path), ".xml.gz")) { - bodyReader, err = gzip.NewReader(bodyReader) - if err != nil { - return nil, err - } - defer bodyReader.(*gzip.Reader).Close() - } - body, err := ioutil.ReadAll(bodyReader) - if err != nil { - return nil, err - } - return &Response{ - StatusCode: res.StatusCode, - Body: body, - Headers: &res.Header, - }, nil -} - -func (h *httpBackend) Limit(rule *LimitRule) error { - h.lock.Lock() - if h.LimitRules == nil { - h.LimitRules = make([]*LimitRule, 0, 8) - } - h.LimitRules = append(h.LimitRules, rule) - h.lock.Unlock() - return rule.Init() -} - -func (h *httpBackend) Limits(rules []*LimitRule) error { - for _, r := range rules { - if err := h.Limit(r); err != nil { - return err - } - } - return nil -} diff --git a/vendor/github.com/gocolly/colly/v2/http_trace.go b/vendor/github.com/gocolly/colly/v2/http_trace.go deleted file mode 100644 index bcacbe3..0000000 --- a/vendor/github.com/gocolly/colly/v2/http_trace.go +++ /dev/null @@ -1,37 +0,0 @@ -package colly - -import ( - "net/http" - "net/http/httptrace" - "time" -) - -// HTTPTrace provides a datastructure for storing an http trace. -type HTTPTrace struct { - start, connect time.Time - ConnectDuration time.Duration - FirstByteDuration time.Duration -} - -// trace returns a httptrace.ClientTrace object to be used with an http -// request via httptrace.WithClientTrace() that fills in the HttpTrace. -func (ht *HTTPTrace) trace() *httptrace.ClientTrace { - trace := &httptrace.ClientTrace{ - ConnectStart: func(network, addr string) { ht.connect = time.Now() }, - ConnectDone: func(network, addr string, err error) { - ht.ConnectDuration = time.Since(ht.connect) - }, - - GetConn: func(hostPort string) { ht.start = time.Now() }, - GotFirstResponseByte: func() { - ht.FirstByteDuration = time.Since(ht.start) - }, - } - return trace -} - -// WithTrace returns the given HTTP Request with this HTTPTrace added to its -// context. -func (ht *HTTPTrace) WithTrace(req *http.Request) *http.Request { - return req.WithContext(httptrace.WithClientTrace(req.Context(), ht.trace())) -} diff --git a/vendor/github.com/gocolly/colly/v2/request.go b/vendor/github.com/gocolly/colly/v2/request.go deleted file mode 100644 index c2f6d2a..0000000 --- a/vendor/github.com/gocolly/colly/v2/request.go +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright 2018 Adam Tauber -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package colly - -import ( - "bytes" - "encoding/json" - "io" - "io/ioutil" - "net/http" - "net/url" - "strings" - "sync/atomic" -) - -// Request is the representation of a HTTP request made by a Collector -type Request struct { - // URL is the parsed URL of the HTTP request - URL *url.URL - // Headers contains the Request's HTTP headers - Headers *http.Header - // Ctx is a context between a Request and a Response - Ctx *Context - // Depth is the number of the parents of the request - Depth int - // Method is the HTTP method of the request - Method string - // Body is the request body which is used on POST/PUT requests - Body io.Reader - // ResponseCharacterencoding is the character encoding of the response body. - // Leave it blank to allow automatic character encoding of the response body. - // It is empty by default and it can be set in OnRequest callback. - ResponseCharacterEncoding string - // ID is the Unique identifier of the request - ID uint32 - collector *Collector - abort bool - baseURL *url.URL - // ProxyURL is the proxy address that handles the request - ProxyURL string -} - -type serializableRequest struct { - URL string - Method string - Depth int - Body []byte - ID uint32 - Ctx map[string]interface{} - Headers http.Header -} - -// New creates a new request with the context of the original request -func (r *Request) New(method, URL string, body io.Reader) (*Request, error) { - u, err := url.Parse(URL) - if err != nil { - return nil, err - } - return &Request{ - Method: method, - URL: u, - Body: body, - Ctx: r.Ctx, - Headers: &http.Header{}, - ID: atomic.AddUint32(&r.collector.requestCount, 1), - collector: r.collector, - }, nil -} - -// Abort cancels the HTTP request when called in an OnRequest callback -func (r *Request) Abort() { - r.abort = true -} - -// AbsoluteURL returns with the resolved absolute URL of an URL chunk. -// AbsoluteURL returns empty string if the URL chunk is a fragment or -// could not be parsed -func (r *Request) AbsoluteURL(u string) string { - if strings.HasPrefix(u, "#") { - return "" - } - var base *url.URL - if r.baseURL != nil { - base = r.baseURL - } else { - base = r.URL - } - absURL, err := base.Parse(u) - if err != nil { - return "" - } - absURL.Fragment = "" - if absURL.Scheme == "//" { - absURL.Scheme = r.URL.Scheme - } - return absURL.String() -} - -// Visit continues Collector's collecting job by creating a -// request and preserves the Context of the previous request. -// Visit also calls the previously provided callbacks -func (r *Request) Visit(URL string) error { - return r.collector.scrape(r.AbsoluteURL(URL), "GET", r.Depth+1, nil, r.Ctx, nil, true) -} - -// HasVisited checks if the provided URL has been visited -func (r *Request) HasVisited(URL string) (bool, error) { - return r.collector.HasVisited(URL) -} - -// Post continues a collector job by creating a POST request and preserves the Context -// of the previous request. -// Post also calls the previously provided callbacks -func (r *Request) Post(URL string, requestData map[string]string) error { - return r.collector.scrape(r.AbsoluteURL(URL), "POST", r.Depth+1, createFormReader(requestData), r.Ctx, nil, true) -} - -// PostRaw starts a collector job by creating a POST request with raw binary data. -// PostRaw preserves the Context of the previous request -// and calls the previously provided callbacks -func (r *Request) PostRaw(URL string, requestData []byte) error { - return r.collector.scrape(r.AbsoluteURL(URL), "POST", r.Depth+1, bytes.NewReader(requestData), r.Ctx, nil, true) -} - -// PostMultipart starts a collector job by creating a Multipart POST request -// with raw binary data. PostMultipart also calls the previously provided. -// callbacks -func (r *Request) PostMultipart(URL string, requestData map[string][]byte) error { - boundary := randomBoundary() - hdr := http.Header{} - hdr.Set("Content-Type", "multipart/form-data; boundary="+boundary) - hdr.Set("User-Agent", r.collector.UserAgent) - return r.collector.scrape(r.AbsoluteURL(URL), "POST", r.Depth+1, createMultipartReader(boundary, requestData), r.Ctx, hdr, true) -} - -// Retry submits HTTP request again with the same parameters -func (r *Request) Retry() error { - r.Headers.Del("Cookie") - return r.collector.scrape(r.URL.String(), r.Method, r.Depth, r.Body, r.Ctx, *r.Headers, false) -} - -// Do submits the request -func (r *Request) Do() error { - return r.collector.scrape(r.URL.String(), r.Method, r.Depth, r.Body, r.Ctx, *r.Headers, !r.collector.AllowURLRevisit) -} - -// Marshal serializes the Request -func (r *Request) Marshal() ([]byte, error) { - ctx := make(map[string]interface{}) - if r.Ctx != nil { - r.Ctx.ForEach(func(k string, v interface{}) interface{} { - ctx[k] = v - return nil - }) - } - var err error - var body []byte - if r.Body != nil { - body, err = ioutil.ReadAll(r.Body) - if err != nil { - return nil, err - } - } - sr := &serializableRequest{ - URL: r.URL.String(), - Method: r.Method, - Depth: r.Depth, - Body: body, - ID: r.ID, - Ctx: ctx, - } - if r.Headers != nil { - sr.Headers = *r.Headers - } - return json.Marshal(sr) -} diff --git a/vendor/github.com/gocolly/colly/v2/response.go b/vendor/github.com/gocolly/colly/v2/response.go deleted file mode 100644 index 049d880..0000000 --- a/vendor/github.com/gocolly/colly/v2/response.go +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2018 Adam Tauber -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package colly - -import ( - "bytes" - "fmt" - "io/ioutil" - "mime" - "net/http" - "strings" - - "github.com/saintfish/chardet" - "golang.org/x/net/html/charset" -) - -// Response is the representation of a HTTP response made by a Collector -type Response struct { - // StatusCode is the status code of the Response - StatusCode int - // Body is the content of the Response - Body []byte - // Ctx is a context between a Request and a Response - Ctx *Context - // Request is the Request object of the response - Request *Request - // Headers contains the Response's HTTP headers - Headers *http.Header - // Trace contains the HTTPTrace for the request. Will only be set by the - // collector if Collector.TraceHTTP is set to true. - Trace *HTTPTrace -} - -// Save writes response body to disk -func (r *Response) Save(fileName string) error { - return ioutil.WriteFile(fileName, r.Body, 0644) -} - -// FileName returns the sanitized file name parsed from "Content-Disposition" -// header or from URL -func (r *Response) FileName() string { - _, params, err := mime.ParseMediaType(r.Headers.Get("Content-Disposition")) - if fName, ok := params["filename"]; ok && err == nil { - return SanitizeFileName(fName) - } - if r.Request.URL.RawQuery != "" { - return SanitizeFileName(fmt.Sprintf("%s_%s", r.Request.URL.Path, r.Request.URL.RawQuery)) - } - return SanitizeFileName(strings.TrimPrefix(r.Request.URL.Path, "/")) -} - -func (r *Response) fixCharset(detectCharset bool, defaultEncoding string) error { - if len(r.Body) == 0 { - return nil - } - if defaultEncoding != "" { - tmpBody, err := encodeBytes(r.Body, "text/plain; charset="+defaultEncoding) - if err != nil { - return err - } - r.Body = tmpBody - return nil - } - contentType := strings.ToLower(r.Headers.Get("Content-Type")) - - if strings.Contains(contentType, "image/") || - strings.Contains(contentType, "video/") || - strings.Contains(contentType, "audio/") || - strings.Contains(contentType, "font/") { - // These MIME types should not have textual data. - - return nil - } - - if !strings.Contains(contentType, "charset") { - if !detectCharset { - return nil - } - d := chardet.NewTextDetector() - r, err := d.DetectBest(r.Body) - if err != nil { - return err - } - contentType = "text/plain; charset=" + r.Charset - } - if strings.Contains(contentType, "utf-8") || strings.Contains(contentType, "utf8") { - return nil - } - tmpBody, err := encodeBytes(r.Body, contentType) - if err != nil { - return err - } - r.Body = tmpBody - return nil -} - -func encodeBytes(b []byte, contentType string) ([]byte, error) { - r, err := charset.NewReader(bytes.NewReader(b), contentType) - if err != nil { - return nil, err - } - return ioutil.ReadAll(r) -} diff --git a/vendor/github.com/gocolly/colly/v2/storage/BUILD.bazel b/vendor/github.com/gocolly/colly/v2/storage/BUILD.bazel deleted file mode 100644 index 9ee4a09..0000000 --- a/vendor/github.com/gocolly/colly/v2/storage/BUILD.bazel +++ /dev/null @@ -1,9 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -go_library( - name = "storage", - srcs = ["storage.go"], - importmap = "peridot.resf.org/vendor/github.com/gocolly/colly/v2/storage", - importpath = "github.com/gocolly/colly/v2/storage", - visibility = ["//visibility:public"], -) diff --git a/vendor/github.com/gocolly/colly/v2/storage/storage.go b/vendor/github.com/gocolly/colly/v2/storage/storage.go deleted file mode 100644 index fcb0c0c..0000000 --- a/vendor/github.com/gocolly/colly/v2/storage/storage.go +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright 2018 Adam Tauber -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package storage - -import ( - "net/http" - "net/http/cookiejar" - "net/url" - "strings" - "sync" -) - -// Storage is an interface which handles Collector's internal data, -// like visited urls and cookies. -// The default Storage of the Collector is the InMemoryStorage. -// Collector's storage can be changed by calling Collector.SetStorage() -// function. -type Storage interface { - // Init initializes the storage - Init() error - // Visited receives and stores a request ID that is visited by the Collector - Visited(requestID uint64) error - // IsVisited returns true if the request was visited before IsVisited - // is called - IsVisited(requestID uint64) (bool, error) - // Cookies retrieves stored cookies for a given host - Cookies(u *url.URL) string - // SetCookies stores cookies for a given host - SetCookies(u *url.URL, cookies string) -} - -// InMemoryStorage is the default storage backend of colly. -// InMemoryStorage keeps cookies and visited urls in memory -// without persisting data on the disk. -type InMemoryStorage struct { - visitedURLs map[uint64]bool - lock *sync.RWMutex - jar *cookiejar.Jar -} - -// Init initializes InMemoryStorage -func (s *InMemoryStorage) Init() error { - if s.visitedURLs == nil { - s.visitedURLs = make(map[uint64]bool) - } - if s.lock == nil { - s.lock = &sync.RWMutex{} - } - if s.jar == nil { - var err error - s.jar, err = cookiejar.New(nil) - return err - } - return nil -} - -// Visited implements Storage.Visited() -func (s *InMemoryStorage) Visited(requestID uint64) error { - s.lock.Lock() - s.visitedURLs[requestID] = true - s.lock.Unlock() - return nil -} - -// IsVisited implements Storage.IsVisited() -func (s *InMemoryStorage) IsVisited(requestID uint64) (bool, error) { - s.lock.RLock() - visited := s.visitedURLs[requestID] - s.lock.RUnlock() - return visited, nil -} - -// Cookies implements Storage.Cookies() -func (s *InMemoryStorage) Cookies(u *url.URL) string { - return StringifyCookies(s.jar.Cookies(u)) -} - -// SetCookies implements Storage.SetCookies() -func (s *InMemoryStorage) SetCookies(u *url.URL, cookies string) { - s.jar.SetCookies(u, UnstringifyCookies(cookies)) -} - -// Close implements Storage.Close() -func (s *InMemoryStorage) Close() error { - return nil -} - -// StringifyCookies serializes list of http.Cookies to string -func StringifyCookies(cookies []*http.Cookie) string { - // Stringify cookies. - cs := make([]string, len(cookies)) - for i, c := range cookies { - cs[i] = c.String() - } - return strings.Join(cs, "\n") -} - -// UnstringifyCookies deserializes a cookie string to http.Cookies -func UnstringifyCookies(s string) []*http.Cookie { - h := http.Header{} - for _, c := range strings.Split(s, "\n") { - h.Add("Set-Cookie", c) - } - r := http.Response{Header: h} - return r.Cookies() -} - -// ContainsCookie checks if a cookie name is represented in cookies -func ContainsCookie(cookies []*http.Cookie, name string) bool { - for _, c := range cookies { - if c.Name == name { - return true - } - } - return false -} diff --git a/vendor/github.com/gocolly/colly/v2/unmarshal.go b/vendor/github.com/gocolly/colly/v2/unmarshal.go deleted file mode 100644 index 302f258..0000000 --- a/vendor/github.com/gocolly/colly/v2/unmarshal.go +++ /dev/null @@ -1,218 +0,0 @@ -// Copyright 2018 Adam Tauber -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package colly - -import ( - "errors" - "reflect" - "strings" - - "github.com/PuerkitoBio/goquery" -) - -// Unmarshal is a shorthand for colly.UnmarshalHTML -func (h *HTMLElement) Unmarshal(v interface{}) error { - return UnmarshalHTML(v, h.DOM, nil) -} - -// UnmarshalWithMap is a shorthand for colly.UnmarshalHTML, extended to allow maps to be passed in. -func (h *HTMLElement) UnmarshalWithMap(v interface{}, structMap map[string]string) error { - return UnmarshalHTML(v, h.DOM, structMap) -} - -// UnmarshalHTML declaratively extracts text or attributes to a struct from -// HTML response using struct tags composed of css selectors. -// Allowed struct tags: -// - "selector" (required): CSS (goquery) selector of the desired data -// - "attr" (optional): Selects the matching element's attribute's value. -// Leave it blank or omit to get the text of the element. -// -// Example struct declaration: -// -// type Nested struct { -// String string `selector:"div > p"` -// Classes []string `selector:"li" attr:"class"` -// Struct *Nested `selector:"div > div"` -// } -// -// Supported types: struct, *struct, string, []string -func UnmarshalHTML(v interface{}, s *goquery.Selection, structMap map[string]string) error { - rv := reflect.ValueOf(v) - - if rv.Kind() != reflect.Ptr || rv.IsNil() { - return errors.New("Invalid type or nil-pointer") - } - - sv := rv.Elem() - st := reflect.TypeOf(v).Elem() - if structMap != nil { - for k, v := range structMap { - attrV := sv.FieldByName(k) - if !attrV.CanAddr() || !attrV.CanSet() { - continue - } - if err := unmarshalSelector(s, attrV, v); err != nil { - return err - } - } - } else { - for i := 0; i < sv.NumField(); i++ { - attrV := sv.Field(i) - if !attrV.CanAddr() || !attrV.CanSet() { - continue - } - if err := unmarshalAttr(s, attrV, st.Field(i)); err != nil { - return err - } - - } - } - - return nil -} - -func unmarshalSelector(s *goquery.Selection, attrV reflect.Value, selector string) error { - //selector is "-" specify that field should ignore. - if selector == "-" { - return nil - } - htmlAttr := "" - // TODO support more types - switch attrV.Kind() { - case reflect.Slice: - if err := unmarshalSlice(s, selector, htmlAttr, attrV); err != nil { - return err - } - case reflect.String: - val := getDOMValue(s.Find(selector), htmlAttr) - attrV.Set(reflect.Indirect(reflect.ValueOf(val))) - case reflect.Struct: - if err := unmarshalStruct(s, selector, attrV); err != nil { - return err - } - case reflect.Ptr: - if err := unmarshalPtr(s, selector, attrV); err != nil { - return err - } - default: - return errors.New("Invalid type: " + attrV.String()) - } - return nil -} - -func unmarshalAttr(s *goquery.Selection, attrV reflect.Value, attrT reflect.StructField) error { - selector := attrT.Tag.Get("selector") - //selector is "-" specify that field should ignore. - if selector == "-" { - return nil - } - htmlAttr := attrT.Tag.Get("attr") - // TODO support more types - switch attrV.Kind() { - case reflect.Slice: - if err := unmarshalSlice(s, selector, htmlAttr, attrV); err != nil { - return err - } - case reflect.String: - val := getDOMValue(s.Find(selector), htmlAttr) - attrV.Set(reflect.Indirect(reflect.ValueOf(val))) - case reflect.Struct: - if err := unmarshalStruct(s, selector, attrV); err != nil { - return err - } - case reflect.Ptr: - if err := unmarshalPtr(s, selector, attrV); err != nil { - return err - } - default: - return errors.New("Invalid type: " + attrV.String()) - } - return nil -} - -func unmarshalStruct(s *goquery.Selection, selector string, attrV reflect.Value) error { - newS := s - if selector != "" { - newS = newS.Find(selector) - } - if newS.Nodes == nil { - return nil - } - v := reflect.New(attrV.Type()) - err := UnmarshalHTML(v.Interface(), newS, nil) - if err != nil { - return err - } - attrV.Set(reflect.Indirect(v)) - return nil -} - -func unmarshalPtr(s *goquery.Selection, selector string, attrV reflect.Value) error { - newS := s - if selector != "" { - newS = newS.Find(selector) - } - if newS.Nodes == nil { - return nil - } - e := attrV.Type().Elem() - if e.Kind() != reflect.Struct { - return errors.New("Invalid slice type") - } - v := reflect.New(e) - err := UnmarshalHTML(v.Interface(), newS, nil) - if err != nil { - return err - } - attrV.Set(v) - return nil -} - -func unmarshalSlice(s *goquery.Selection, selector, htmlAttr string, attrV reflect.Value) error { - if attrV.Pointer() == 0 { - v := reflect.MakeSlice(attrV.Type(), 0, 0) - attrV.Set(v) - } - switch attrV.Type().Elem().Kind() { - case reflect.String: - s.Find(selector).Each(func(_ int, s *goquery.Selection) { - val := getDOMValue(s, htmlAttr) - attrV.Set(reflect.Append(attrV, reflect.Indirect(reflect.ValueOf(val)))) - }) - case reflect.Ptr: - s.Find(selector).Each(func(_ int, innerSel *goquery.Selection) { - someVal := reflect.New(attrV.Type().Elem().Elem()) - UnmarshalHTML(someVal.Interface(), innerSel, nil) - attrV.Set(reflect.Append(attrV, someVal)) - }) - case reflect.Struct: - s.Find(selector).Each(func(_ int, innerSel *goquery.Selection) { - someVal := reflect.New(attrV.Type().Elem()) - UnmarshalHTML(someVal.Interface(), innerSel, nil) - attrV.Set(reflect.Append(attrV, reflect.Indirect(someVal))) - }) - default: - return errors.New("Invalid slice type") - } - return nil -} - -func getDOMValue(s *goquery.Selection, attr string) string { - if attr == "" { - return strings.TrimSpace(s.First().Text()) - } - attrV, _ := s.Attr(attr) - return attrV -} diff --git a/vendor/github.com/gocolly/colly/v2/xmlelement.go b/vendor/github.com/gocolly/colly/v2/xmlelement.go deleted file mode 100644 index 7ff5fe5..0000000 --- a/vendor/github.com/gocolly/colly/v2/xmlelement.go +++ /dev/null @@ -1,170 +0,0 @@ -// Copyright 2018 Adam Tauber -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package colly - -import ( - "encoding/xml" - "strings" - - "github.com/antchfx/htmlquery" - "github.com/antchfx/xmlquery" - "golang.org/x/net/html" -) - -// XMLElement is the representation of a XML tag. -type XMLElement struct { - // Name is the name of the tag - Name string - Text string - attributes interface{} - // Request is the request object of the element's HTML document - Request *Request - // Response is the Response object of the element's HTML document - Response *Response - // DOM is the DOM object of the page. DOM is relative - // to the current XMLElement and is either a html.Node or xmlquery.Node - // based on how the XMLElement was created. - DOM interface{} - isHTML bool -} - -// NewXMLElementFromHTMLNode creates a XMLElement from a html.Node. -func NewXMLElementFromHTMLNode(resp *Response, s *html.Node) *XMLElement { - return &XMLElement{ - Name: s.Data, - Request: resp.Request, - Response: resp, - Text: htmlquery.InnerText(s), - DOM: s, - attributes: s.Attr, - isHTML: true, - } -} - -// NewXMLElementFromXMLNode creates a XMLElement from a xmlquery.Node. -func NewXMLElementFromXMLNode(resp *Response, s *xmlquery.Node) *XMLElement { - return &XMLElement{ - Name: s.Data, - Request: resp.Request, - Response: resp, - Text: s.InnerText(), - DOM: s, - attributes: s.Attr, - isHTML: false, - } -} - -// Attr returns the selected attribute of a HTMLElement or empty string -// if no attribute found -func (h *XMLElement) Attr(k string) string { - if h.isHTML { - for _, a := range h.attributes.([]html.Attribute) { - if a.Key == k { - return a.Val - } - } - } else { - for _, a := range h.attributes.([]xml.Attr) { - if a.Name.Local == k { - return a.Value - } - } - } - return "" -} - -// ChildText returns the concatenated and stripped text content of the matching -// elements. -func (h *XMLElement) ChildText(xpathQuery string) string { - if h.isHTML { - child := htmlquery.FindOne(h.DOM.(*html.Node), xpathQuery) - if child == nil { - return "" - } - return strings.TrimSpace(htmlquery.InnerText(child)) - } - child := xmlquery.FindOne(h.DOM.(*xmlquery.Node), xpathQuery) - if child == nil { - return "" - } - return strings.TrimSpace(child.InnerText()) - -} - -// ChildAttr returns the stripped text content of the first matching -// element's attribute. -func (h *XMLElement) ChildAttr(xpathQuery, attrName string) string { - if h.isHTML { - child := htmlquery.FindOne(h.DOM.(*html.Node), xpathQuery) - if child != nil { - for _, attr := range child.Attr { - if attr.Key == attrName { - return strings.TrimSpace(attr.Val) - } - } - } - } else { - child := xmlquery.FindOne(h.DOM.(*xmlquery.Node), xpathQuery) - if child != nil { - for _, attr := range child.Attr { - if attr.Name.Local == attrName { - return strings.TrimSpace(attr.Value) - } - } - } - } - - return "" -} - -// ChildAttrs returns the stripped text content of all the matching -// element's attributes. -func (h *XMLElement) ChildAttrs(xpathQuery, attrName string) []string { - var res []string - if h.isHTML { - for _, child := range htmlquery.Find(h.DOM.(*html.Node), xpathQuery) { - for _, attr := range child.Attr { - if attr.Key == attrName { - res = append(res, strings.TrimSpace(attr.Val)) - } - } - } - } else { - xmlquery.FindEach(h.DOM.(*xmlquery.Node), xpathQuery, func(i int, child *xmlquery.Node) { - for _, attr := range child.Attr { - if attr.Name.Local == attrName { - res = append(res, strings.TrimSpace(attr.Value)) - } - } - }) - } - return res -} - -// ChildTexts returns an array of strings corresponding to child elements that match the xpath query. -// Each item in the array is the stripped text content of the corresponding matching child element. -func (h *XMLElement) ChildTexts(xpathQuery string) []string { - texts := make([]string, 0) - if h.isHTML { - for _, child := range htmlquery.Find(h.DOM.(*html.Node), xpathQuery) { - texts = append(texts, strings.TrimSpace(htmlquery.InnerText(child))) - } - } else { - xmlquery.FindEach(h.DOM.(*xmlquery.Node), xpathQuery, func(i int, child *xmlquery.Node) { - texts = append(texts, strings.TrimSpace(child.InnerText())) - }) - } - return texts -} diff --git a/vendor/github.com/gorilla/feeds/.travis.yml b/vendor/github.com/gorilla/feeds/.travis.yml deleted file mode 100644 index 7939a21..0000000 --- a/vendor/github.com/gorilla/feeds/.travis.yml +++ /dev/null @@ -1,16 +0,0 @@ -language: go -sudo: false -matrix: - include: - - go: 1.8 - - go: 1.9 - - go: "1.10" - - go: 1.x - - go: tip - allow_failures: - - go: tip -script: - - go get -t -v ./... - - diff -u <(echo -n) <(gofmt -d -s .) - - go vet . - - go test -v -race ./... diff --git a/vendor/github.com/gorilla/feeds/AUTHORS b/vendor/github.com/gorilla/feeds/AUTHORS deleted file mode 100644 index 2c28cf9..0000000 --- a/vendor/github.com/gorilla/feeds/AUTHORS +++ /dev/null @@ -1,29 +0,0 @@ -# This is the official list of gorilla/feeds authors for copyright purposes. -# Please keep the list sorted. - -Dmitry Chestnykh -Eddie Scholtz -Gabriel Simmer -Google LLC (https://opensource.google.com/) -honky -James Gregory -Jason Hall -Jason Moiron -Kamil Kisiel -Kevin Stock -Markus Zimmermann -Matt Silverlock -Matthew Dawson -Milan Aleksic -Milan Aleksić -nlimpid -Paul Petring -Sean Enck -Sue Spence -Supermighty -Toru Fukui -Vabd -Volker -ZhiFeng Hu -weberc2 - diff --git a/vendor/github.com/gorilla/feeds/BUILD.bazel b/vendor/github.com/gorilla/feeds/BUILD.bazel deleted file mode 100644 index de78e1e..0000000 --- a/vendor/github.com/gorilla/feeds/BUILD.bazel +++ /dev/null @@ -1,16 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -go_library( - name = "feeds", - srcs = [ - "atom.go", - "doc.go", - "feed.go", - "json.go", - "rss.go", - "uuid.go", - ], - importmap = "peridot.resf.org/vendor/github.com/gorilla/feeds", - importpath = "github.com/gorilla/feeds", - visibility = ["//visibility:public"], -) diff --git a/vendor/github.com/gorilla/feeds/LICENSE b/vendor/github.com/gorilla/feeds/LICENSE deleted file mode 100644 index e24412d..0000000 --- a/vendor/github.com/gorilla/feeds/LICENSE +++ /dev/null @@ -1,22 +0,0 @@ -Copyright (c) 2013-2018 The Gorilla Feeds Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/gorilla/feeds/README.md b/vendor/github.com/gorilla/feeds/README.md deleted file mode 100644 index 4d733cf..0000000 --- a/vendor/github.com/gorilla/feeds/README.md +++ /dev/null @@ -1,185 +0,0 @@ -## gorilla/feeds -[![GoDoc](https://godoc.org/github.com/gorilla/feeds?status.svg)](https://godoc.org/github.com/gorilla/feeds) -[![Build Status](https://travis-ci.org/gorilla/feeds.svg?branch=master)](https://travis-ci.org/gorilla/feeds) - -feeds is a web feed generator library for generating RSS, Atom and JSON feeds from Go -applications. - -### Goals - - * Provide a simple interface to create both Atom & RSS 2.0 feeds - * Full support for [Atom][atom], [RSS 2.0][rss], and [JSON Feed Version 1][jsonfeed] spec elements - * Ability to modify particulars for each spec - -[atom]: https://tools.ietf.org/html/rfc4287 -[rss]: http://www.rssboard.org/rss-specification -[jsonfeed]: https://jsonfeed.org/version/1 - -### Usage - -```go -package main - -import ( - "fmt" - "log" - "time" - "github.com/gorilla/feeds" -) - -func main() { - now := time.Now() - feed := &feeds.Feed{ - Title: "jmoiron.net blog", - Link: &feeds.Link{Href: "http://jmoiron.net/blog"}, - Description: "discussion about tech, footie, photos", - Author: &feeds.Author{Name: "Jason Moiron", Email: "jmoiron@jmoiron.net"}, - Created: now, - } - - feed.Items = []*feeds.Item{ - &feeds.Item{ - Title: "Limiting Concurrency in Go", - Link: &feeds.Link{Href: "http://jmoiron.net/blog/limiting-concurrency-in-go/"}, - Description: "A discussion on controlled parallelism in golang", - Author: &feeds.Author{Name: "Jason Moiron", Email: "jmoiron@jmoiron.net"}, - Created: now, - }, - &feeds.Item{ - Title: "Logic-less Template Redux", - Link: &feeds.Link{Href: "http://jmoiron.net/blog/logicless-template-redux/"}, - Description: "More thoughts on logicless templates", - Created: now, - }, - &feeds.Item{ - Title: "Idiomatic Code Reuse in Go", - Link: &feeds.Link{Href: "http://jmoiron.net/blog/idiomatic-code-reuse-in-go/"}, - Description: "How to use interfaces effectively", - Created: now, - }, - } - - atom, err := feed.ToAtom() - if err != nil { - log.Fatal(err) - } - - rss, err := feed.ToRss() - if err != nil { - log.Fatal(err) - } - - json, err := feed.ToJSON() - if err != nil { - log.Fatal(err) - } - - fmt.Println(atom, "\n", rss, "\n", json) -} -``` - -Outputs: - -```xml - - - jmoiron.net blog - - http://jmoiron.net/blog - 2013-01-16T03:26:01-05:00 - discussion about tech, footie, photos - - Limiting Concurrency in Go - - 2013-01-16T03:26:01-05:00 - tag:jmoiron.net,2013-01-16:/blog/limiting-concurrency-in-go/ - A discussion on controlled parallelism in golang - - Jason Moiron - jmoiron@jmoiron.net - - - - Logic-less Template Redux - - 2013-01-16T03:26:01-05:00 - tag:jmoiron.net,2013-01-16:/blog/logicless-template-redux/ - More thoughts on logicless templates - - - - Idiomatic Code Reuse in Go - - 2013-01-16T03:26:01-05:00 - tag:jmoiron.net,2013-01-16:/blog/idiomatic-code-reuse-in-go/ - How to use interfaces <em>effectively</em> - - - - - - - - jmoiron.net blog - http://jmoiron.net/blog - discussion about tech, footie, photos - jmoiron@jmoiron.net (Jason Moiron) - 2013-01-16T03:22:24-05:00 - - Limiting Concurrency in Go - http://jmoiron.net/blog/limiting-concurrency-in-go/ - A discussion on controlled parallelism in golang - 2013-01-16T03:22:24-05:00 - - - Logic-less Template Redux - http://jmoiron.net/blog/logicless-template-redux/ - More thoughts on logicless templates - 2013-01-16T03:22:24-05:00 - - - Idiomatic Code Reuse in Go - http://jmoiron.net/blog/idiomatic-code-reuse-in-go/ - How to use interfaces <em>effectively</em> - 2013-01-16T03:22:24-05:00 - - - - -{ - "version": "https://jsonfeed.org/version/1", - "title": "jmoiron.net blog", - "home_page_url": "http://jmoiron.net/blog", - "description": "discussion about tech, footie, photos", - "author": { - "name": "Jason Moiron" - }, - "items": [ - { - "id": "", - "url": "http://jmoiron.net/blog/limiting-concurrency-in-go/", - "title": "Limiting Concurrency in Go", - "summary": "A discussion on controlled parallelism in golang", - "date_published": "2013-01-16T03:22:24.530817846-05:00", - "author": { - "name": "Jason Moiron" - } - }, - { - "id": "", - "url": "http://jmoiron.net/blog/logicless-template-redux/", - "title": "Logic-less Template Redux", - "summary": "More thoughts on logicless templates", - "date_published": "2013-01-16T03:22:24.530817846-05:00" - }, - { - "id": "", - "url": "http://jmoiron.net/blog/idiomatic-code-reuse-in-go/", - "title": "Idiomatic Code Reuse in Go", - "summary": "How to use interfaces \u003cem\u003eeffectively\u003c/em\u003e", - "date_published": "2013-01-16T03:22:24.530817846-05:00" - } - ] -} -``` - diff --git a/vendor/github.com/gorilla/feeds/atom.go b/vendor/github.com/gorilla/feeds/atom.go deleted file mode 100644 index 7196f47..0000000 --- a/vendor/github.com/gorilla/feeds/atom.go +++ /dev/null @@ -1,169 +0,0 @@ -package feeds - -import ( - "encoding/xml" - "fmt" - "net/url" - "time" -) - -// Generates Atom feed as XML - -const ns = "http://www.w3.org/2005/Atom" - -type AtomPerson struct { - Name string `xml:"name,omitempty"` - Uri string `xml:"uri,omitempty"` - Email string `xml:"email,omitempty"` -} - -type AtomSummary struct { - XMLName xml.Name `xml:"summary"` - Content string `xml:",chardata"` - Type string `xml:"type,attr"` -} - -type AtomContent struct { - XMLName xml.Name `xml:"content"` - Content string `xml:",chardata"` - Type string `xml:"type,attr"` -} - -type AtomAuthor struct { - XMLName xml.Name `xml:"author"` - AtomPerson -} - -type AtomContributor struct { - XMLName xml.Name `xml:"contributor"` - AtomPerson -} - -type AtomEntry struct { - XMLName xml.Name `xml:"entry"` - Xmlns string `xml:"xmlns,attr,omitempty"` - Title string `xml:"title"` // required - Updated string `xml:"updated"` // required - Id string `xml:"id"` // required - Category string `xml:"category,omitempty"` - Content *AtomContent - Rights string `xml:"rights,omitempty"` - Source string `xml:"source,omitempty"` - Published string `xml:"published,omitempty"` - Contributor *AtomContributor - Links []AtomLink // required if no child 'content' elements - Summary *AtomSummary // required if content has src or content is base64 - Author *AtomAuthor // required if feed lacks an author -} - -// Multiple links with different rel can coexist -type AtomLink struct { - //Atom 1.0 - XMLName xml.Name `xml:"link"` - Href string `xml:"href,attr"` - Rel string `xml:"rel,attr,omitempty"` - Type string `xml:"type,attr,omitempty"` - Length string `xml:"length,attr,omitempty"` -} - -type AtomFeed struct { - XMLName xml.Name `xml:"feed"` - Xmlns string `xml:"xmlns,attr"` - Title string `xml:"title"` // required - Id string `xml:"id"` // required - Updated string `xml:"updated"` // required - Category string `xml:"category,omitempty"` - Icon string `xml:"icon,omitempty"` - Logo string `xml:"logo,omitempty"` - Rights string `xml:"rights,omitempty"` // copyright used - Subtitle string `xml:"subtitle,omitempty"` - Link *AtomLink - Author *AtomAuthor `xml:"author,omitempty"` - Contributor *AtomContributor - Entries []*AtomEntry `xml:"entry"` -} - -type Atom struct { - *Feed -} - -func newAtomEntry(i *Item) *AtomEntry { - id := i.Id - // assume the description is html - s := &AtomSummary{Content: i.Description, Type: "html"} - - if len(id) == 0 { - // if there's no id set, try to create one, either from data or just a uuid - if len(i.Link.Href) > 0 && (!i.Created.IsZero() || !i.Updated.IsZero()) { - dateStr := anyTimeFormat("2006-01-02", i.Updated, i.Created) - host, path := i.Link.Href, "/invalid.html" - if url, err := url.Parse(i.Link.Href); err == nil { - host, path = url.Host, url.Path - } - id = fmt.Sprintf("tag:%s,%s:%s", host, dateStr, path) - } else { - id = "urn:uuid:" + NewUUID().String() - } - } - var name, email string - if i.Author != nil { - name, email = i.Author.Name, i.Author.Email - } - - link_rel := i.Link.Rel - if link_rel == "" { - link_rel = "alternate" - } - x := &AtomEntry{ - Title: i.Title, - Links: []AtomLink{{Href: i.Link.Href, Rel: link_rel, Type: i.Link.Type}}, - Id: id, - Updated: anyTimeFormat(time.RFC3339, i.Updated, i.Created), - Summary: s, - } - - // if there's a content, assume it's html - if len(i.Content) > 0 { - x.Content = &AtomContent{Content: i.Content, Type: "html"} - } - - if i.Enclosure != nil && link_rel != "enclosure" { - x.Links = append(x.Links, AtomLink{Href: i.Enclosure.Url, Rel: "enclosure", Type: i.Enclosure.Type, Length: i.Enclosure.Length}) - } - - if len(name) > 0 || len(email) > 0 { - x.Author = &AtomAuthor{AtomPerson: AtomPerson{Name: name, Email: email}} - } - return x -} - -// create a new AtomFeed with a generic Feed struct's data -func (a *Atom) AtomFeed() *AtomFeed { - updated := anyTimeFormat(time.RFC3339, a.Updated, a.Created) - feed := &AtomFeed{ - Xmlns: ns, - Title: a.Title, - Link: &AtomLink{Href: a.Link.Href, Rel: a.Link.Rel}, - Subtitle: a.Description, - Id: a.Link.Href, - Updated: updated, - Rights: a.Copyright, - } - if a.Author != nil { - feed.Author = &AtomAuthor{AtomPerson: AtomPerson{Name: a.Author.Name, Email: a.Author.Email}} - } - for _, e := range a.Items { - feed.Entries = append(feed.Entries, newAtomEntry(e)) - } - return feed -} - -// FeedXml returns an XML-Ready object for an Atom object -func (a *Atom) FeedXml() interface{} { - return a.AtomFeed() -} - -// FeedXml returns an XML-ready object for an AtomFeed object -func (a *AtomFeed) FeedXml() interface{} { - return a -} diff --git a/vendor/github.com/gorilla/feeds/doc.go b/vendor/github.com/gorilla/feeds/doc.go deleted file mode 100644 index 4e0759c..0000000 --- a/vendor/github.com/gorilla/feeds/doc.go +++ /dev/null @@ -1,73 +0,0 @@ -/* -Syndication (feed) generator library for golang. - -Installing - - go get github.com/gorilla/feeds - -Feeds provides a simple, generic Feed interface with a generic Item object as well as RSS, Atom and JSON Feed specific RssFeed, AtomFeed and JSONFeed objects which allow access to all of each spec's defined elements. - -Examples - -Create a Feed and some Items in that feed using the generic interfaces: - - import ( - "time" - . "github.com/gorilla/feeds" - ) - - now = time.Now() - - feed := &Feed{ - Title: "jmoiron.net blog", - Link: &Link{Href: "http://jmoiron.net/blog"}, - Description: "discussion about tech, footie, photos", - Author: &Author{Name: "Jason Moiron", Email: "jmoiron@jmoiron.net"}, - Created: now, - Copyright: "This work is copyright © Benjamin Button", - } - - feed.Items = []*Item{ - &Item{ - Title: "Limiting Concurrency in Go", - Link: &Link{Href: "http://jmoiron.net/blog/limiting-concurrency-in-go/"}, - Description: "A discussion on controlled parallelism in golang", - Author: &Author{Name: "Jason Moiron", Email: "jmoiron@jmoiron.net"}, - Created: now, - }, - &Item{ - Title: "Logic-less Template Redux", - Link: &Link{Href: "http://jmoiron.net/blog/logicless-template-redux/"}, - Description: "More thoughts on logicless templates", - Created: now, - }, - &Item{ - Title: "Idiomatic Code Reuse in Go", - Link: &Link{Href: "http://jmoiron.net/blog/idiomatic-code-reuse-in-go/"}, - Description: "How to use interfaces effectively", - Created: now, - }, - } - -From here, you can output Atom, RSS, or JSON Feed versions of this feed easily - - atom, err := feed.ToAtom() - rss, err := feed.ToRss() - json, err := feed.ToJSON() - -You can also get access to the underlying objects that feeds uses to export its XML - - atomFeed := (&Atom{Feed: feed}).AtomFeed() - rssFeed := (&Rss{Feed: feed}).RssFeed() - jsonFeed := (&JSON{Feed: feed}).JSONFeed() - -From here, you can modify or add each syndication's specific fields before outputting - - atomFeed.Subtitle = "plays the blues" - atom, err := ToXML(atomFeed) - rssFeed.Generator = "gorilla/feeds v1.0 (github.com/gorilla/feeds)" - rss, err := ToXML(rssFeed) - jsonFeed.NextUrl = "https://www.example.com/feed.json?page=2" - json, err := jsonFeed.ToJSON() -*/ -package feeds diff --git a/vendor/github.com/gorilla/feeds/feed.go b/vendor/github.com/gorilla/feeds/feed.go deleted file mode 100644 index 790a1b6..0000000 --- a/vendor/github.com/gorilla/feeds/feed.go +++ /dev/null @@ -1,145 +0,0 @@ -package feeds - -import ( - "encoding/json" - "encoding/xml" - "io" - "sort" - "time" -) - -type Link struct { - Href, Rel, Type, Length string -} - -type Author struct { - Name, Email string -} - -type Image struct { - Url, Title, Link string - Width, Height int -} - -type Enclosure struct { - Url, Length, Type string -} - -type Item struct { - Title string - Link *Link - Source *Link - Author *Author - Description string // used as description in rss, summary in atom - Id string // used as guid in rss, id in atom - Updated time.Time - Created time.Time - Enclosure *Enclosure - Content string -} - -type Feed struct { - Title string - Link *Link - Description string - Author *Author - Updated time.Time - Created time.Time - Id string - Subtitle string - Items []*Item - Copyright string - Image *Image -} - -// add a new Item to a Feed -func (f *Feed) Add(item *Item) { - f.Items = append(f.Items, item) -} - -// returns the first non-zero time formatted as a string or "" -func anyTimeFormat(format string, times ...time.Time) string { - for _, t := range times { - if !t.IsZero() { - return t.Format(format) - } - } - return "" -} - -// interface used by ToXML to get a object suitable for exporting XML. -type XmlFeed interface { - FeedXml() interface{} -} - -// turn a feed object (either a Feed, AtomFeed, or RssFeed) into xml -// returns an error if xml marshaling fails -func ToXML(feed XmlFeed) (string, error) { - x := feed.FeedXml() - data, err := xml.MarshalIndent(x, "", " ") - if err != nil { - return "", err - } - // strip empty line from default xml header - s := xml.Header[:len(xml.Header)-1] + string(data) - return s, nil -} - -// WriteXML writes a feed object (either a Feed, AtomFeed, or RssFeed) as XML into -// the writer. Returns an error if XML marshaling fails. -func WriteXML(feed XmlFeed, w io.Writer) error { - x := feed.FeedXml() - // write default xml header, without the newline - if _, err := w.Write([]byte(xml.Header[:len(xml.Header)-1])); err != nil { - return err - } - e := xml.NewEncoder(w) - e.Indent("", " ") - return e.Encode(x) -} - -// creates an Atom representation of this feed -func (f *Feed) ToAtom() (string, error) { - a := &Atom{f} - return ToXML(a) -} - -// WriteAtom writes an Atom representation of this feed to the writer. -func (f *Feed) WriteAtom(w io.Writer) error { - return WriteXML(&Atom{f}, w) -} - -// creates an Rss representation of this feed -func (f *Feed) ToRss() (string, error) { - r := &Rss{f} - return ToXML(r) -} - -// WriteRss writes an RSS representation of this feed to the writer. -func (f *Feed) WriteRss(w io.Writer) error { - return WriteXML(&Rss{f}, w) -} - -// ToJSON creates a JSON Feed representation of this feed -func (f *Feed) ToJSON() (string, error) { - j := &JSON{f} - return j.ToJSON() -} - -// WriteJSON writes an JSON representation of this feed to the writer. -func (f *Feed) WriteJSON(w io.Writer) error { - j := &JSON{f} - feed := j.JSONFeed() - - e := json.NewEncoder(w) - e.SetIndent("", " ") - return e.Encode(feed) -} - -// Sort sorts the Items in the feed with the given less function. -func (f *Feed) Sort(less func(a, b *Item) bool) { - lessFunc := func(i, j int) bool { - return less(f.Items[i], f.Items[j]) - } - sort.SliceStable(f.Items, lessFunc) -} diff --git a/vendor/github.com/gorilla/feeds/json.go b/vendor/github.com/gorilla/feeds/json.go deleted file mode 100644 index 75a82fd..0000000 --- a/vendor/github.com/gorilla/feeds/json.go +++ /dev/null @@ -1,183 +0,0 @@ -package feeds - -import ( - "encoding/json" - "strings" - "time" -) - -const jsonFeedVersion = "https://jsonfeed.org/version/1" - -// JSONAuthor represents the author of the feed or of an individual item -// in the feed -type JSONAuthor struct { - Name string `json:"name,omitempty"` - Url string `json:"url,omitempty"` - Avatar string `json:"avatar,omitempty"` -} - -// JSONAttachment represents a related resource. Podcasts, for instance, would -// include an attachment that’s an audio or video file. -type JSONAttachment struct { - Url string `json:"url,omitempty"` - MIMEType string `json:"mime_type,omitempty"` - Title string `json:"title,omitempty"` - Size int32 `json:"size,omitempty"` - Duration time.Duration `json:"duration_in_seconds,omitempty"` -} - -// MarshalJSON implements the json.Marshaler interface. -// The Duration field is marshaled in seconds, all other fields are marshaled -// based upon the definitions in struct tags. -func (a *JSONAttachment) MarshalJSON() ([]byte, error) { - type EmbeddedJSONAttachment JSONAttachment - return json.Marshal(&struct { - Duration float64 `json:"duration_in_seconds,omitempty"` - *EmbeddedJSONAttachment - }{ - EmbeddedJSONAttachment: (*EmbeddedJSONAttachment)(a), - Duration: a.Duration.Seconds(), - }) -} - -// UnmarshalJSON implements the json.Unmarshaler interface. -// The Duration field is expected to be in seconds, all other field types -// match the struct definition. -func (a *JSONAttachment) UnmarshalJSON(data []byte) error { - type EmbeddedJSONAttachment JSONAttachment - var raw struct { - Duration float64 `json:"duration_in_seconds,omitempty"` - *EmbeddedJSONAttachment - } - raw.EmbeddedJSONAttachment = (*EmbeddedJSONAttachment)(a) - - err := json.Unmarshal(data, &raw) - if err != nil { - return err - } - - if raw.Duration > 0 { - nsec := int64(raw.Duration * float64(time.Second)) - raw.EmbeddedJSONAttachment.Duration = time.Duration(nsec) - } - - return nil -} - -// JSONItem represents a single entry/post for the feed. -type JSONItem struct { - Id string `json:"id"` - Url string `json:"url,omitempty"` - ExternalUrl string `json:"external_url,omitempty"` - Title string `json:"title,omitempty"` - ContentHTML string `json:"content_html,omitempty"` - ContentText string `json:"content_text,omitempty"` - Summary string `json:"summary,omitempty"` - Image string `json:"image,omitempty"` - BannerImage string `json:"banner_,omitempty"` - PublishedDate *time.Time `json:"date_published,omitempty"` - ModifiedDate *time.Time `json:"date_modified,omitempty"` - Author *JSONAuthor `json:"author,omitempty"` - Tags []string `json:"tags,omitempty"` - Attachments []JSONAttachment `json:"attachments,omitempty"` -} - -// JSONHub describes an endpoint that can be used to subscribe to real-time -// notifications from the publisher of this feed. -type JSONHub struct { - Type string `json:"type"` - Url string `json:"url"` -} - -// JSONFeed represents a syndication feed in the JSON Feed Version 1 format. -// Matching the specification found here: https://jsonfeed.org/version/1. -type JSONFeed struct { - Version string `json:"version"` - Title string `json:"title"` - HomePageUrl string `json:"home_page_url,omitempty"` - FeedUrl string `json:"feed_url,omitempty"` - Description string `json:"description,omitempty"` - UserComment string `json:"user_comment,omitempty"` - NextUrl string `json:"next_url,omitempty"` - Icon string `json:"icon,omitempty"` - Favicon string `json:"favicon,omitempty"` - Author *JSONAuthor `json:"author,omitempty"` - Expired *bool `json:"expired,omitempty"` - Hubs []*JSONItem `json:"hubs,omitempty"` - Items []*JSONItem `json:"items,omitempty"` -} - -// JSON is used to convert a generic Feed to a JSONFeed. -type JSON struct { - *Feed -} - -// ToJSON encodes f into a JSON string. Returns an error if marshalling fails. -func (f *JSON) ToJSON() (string, error) { - return f.JSONFeed().ToJSON() -} - -// ToJSON encodes f into a JSON string. Returns an error if marshalling fails. -func (f *JSONFeed) ToJSON() (string, error) { - data, err := json.MarshalIndent(f, "", " ") - if err != nil { - return "", err - } - - return string(data), nil -} - -// JSONFeed creates a new JSONFeed with a generic Feed struct's data. -func (f *JSON) JSONFeed() *JSONFeed { - feed := &JSONFeed{ - Version: jsonFeedVersion, - Title: f.Title, - Description: f.Description, - } - - if f.Link != nil { - feed.HomePageUrl = f.Link.Href - } - if f.Author != nil { - feed.Author = &JSONAuthor{ - Name: f.Author.Name, - } - } - for _, e := range f.Items { - feed.Items = append(feed.Items, newJSONItem(e)) - } - return feed -} - -func newJSONItem(i *Item) *JSONItem { - item := &JSONItem{ - Id: i.Id, - Title: i.Title, - Summary: i.Description, - - ContentHTML: i.Content, - } - - if i.Link != nil { - item.Url = i.Link.Href - } - if i.Source != nil { - item.ExternalUrl = i.Source.Href - } - if i.Author != nil { - item.Author = &JSONAuthor{ - Name: i.Author.Name, - } - } - if !i.Created.IsZero() { - item.PublishedDate = &i.Created - } - if !i.Updated.IsZero() { - item.ModifiedDate = &i.Updated - } - if i.Enclosure != nil && strings.HasPrefix(i.Enclosure.Type, "image/") { - item.Image = i.Enclosure.Url - } - - return item -} diff --git a/vendor/github.com/gorilla/feeds/rss.go b/vendor/github.com/gorilla/feeds/rss.go deleted file mode 100644 index 09179df..0000000 --- a/vendor/github.com/gorilla/feeds/rss.go +++ /dev/null @@ -1,168 +0,0 @@ -package feeds - -// rss support -// validation done according to spec here: -// http://cyber.law.harvard.edu/rss/rss.html - -import ( - "encoding/xml" - "fmt" - "time" -) - -// private wrapper around the RssFeed which gives us the .. xml -type RssFeedXml struct { - XMLName xml.Name `xml:"rss"` - Version string `xml:"version,attr"` - ContentNamespace string `xml:"xmlns:content,attr"` - Channel *RssFeed -} - -type RssContent struct { - XMLName xml.Name `xml:"content:encoded"` - Content string `xml:",cdata"` -} - -type RssImage struct { - XMLName xml.Name `xml:"image"` - Url string `xml:"url"` - Title string `xml:"title"` - Link string `xml:"link"` - Width int `xml:"width,omitempty"` - Height int `xml:"height,omitempty"` -} - -type RssTextInput struct { - XMLName xml.Name `xml:"textInput"` - Title string `xml:"title"` - Description string `xml:"description"` - Name string `xml:"name"` - Link string `xml:"link"` -} - -type RssFeed struct { - XMLName xml.Name `xml:"channel"` - Title string `xml:"title"` // required - Link string `xml:"link"` // required - Description string `xml:"description"` // required - Language string `xml:"language,omitempty"` - Copyright string `xml:"copyright,omitempty"` - ManagingEditor string `xml:"managingEditor,omitempty"` // Author used - WebMaster string `xml:"webMaster,omitempty"` - PubDate string `xml:"pubDate,omitempty"` // created or updated - LastBuildDate string `xml:"lastBuildDate,omitempty"` // updated used - Category string `xml:"category,omitempty"` - Generator string `xml:"generator,omitempty"` - Docs string `xml:"docs,omitempty"` - Cloud string `xml:"cloud,omitempty"` - Ttl int `xml:"ttl,omitempty"` - Rating string `xml:"rating,omitempty"` - SkipHours string `xml:"skipHours,omitempty"` - SkipDays string `xml:"skipDays,omitempty"` - Image *RssImage - TextInput *RssTextInput - Items []*RssItem `xml:"item"` -} - -type RssItem struct { - XMLName xml.Name `xml:"item"` - Title string `xml:"title"` // required - Link string `xml:"link"` // required - Description string `xml:"description"` // required - Content *RssContent - Author string `xml:"author,omitempty"` - Category string `xml:"category,omitempty"` - Comments string `xml:"comments,omitempty"` - Enclosure *RssEnclosure - Guid string `xml:"guid,omitempty"` // Id used - PubDate string `xml:"pubDate,omitempty"` // created or updated - Source string `xml:"source,omitempty"` -} - -type RssEnclosure struct { - //RSS 2.0 - XMLName xml.Name `xml:"enclosure"` - Url string `xml:"url,attr"` - Length string `xml:"length,attr"` - Type string `xml:"type,attr"` -} - -type Rss struct { - *Feed -} - -// create a new RssItem with a generic Item struct's data -func newRssItem(i *Item) *RssItem { - item := &RssItem{ - Title: i.Title, - Link: i.Link.Href, - Description: i.Description, - Guid: i.Id, - PubDate: anyTimeFormat(time.RFC1123Z, i.Created, i.Updated), - } - if len(i.Content) > 0 { - item.Content = &RssContent{Content: i.Content} - } - if i.Source != nil { - item.Source = i.Source.Href - } - - // Define a closure - if i.Enclosure != nil && i.Enclosure.Type != "" && i.Enclosure.Length != "" { - item.Enclosure = &RssEnclosure{Url: i.Enclosure.Url, Type: i.Enclosure.Type, Length: i.Enclosure.Length} - } - - if i.Author != nil { - item.Author = i.Author.Name - } - return item -} - -// create a new RssFeed with a generic Feed struct's data -func (r *Rss) RssFeed() *RssFeed { - pub := anyTimeFormat(time.RFC1123Z, r.Created, r.Updated) - build := anyTimeFormat(time.RFC1123Z, r.Updated) - author := "" - if r.Author != nil { - author = r.Author.Email - if len(r.Author.Name) > 0 { - author = fmt.Sprintf("%s (%s)", r.Author.Email, r.Author.Name) - } - } - - var image *RssImage - if r.Image != nil { - image = &RssImage{Url: r.Image.Url, Title: r.Image.Title, Link: r.Image.Link, Width: r.Image.Width, Height: r.Image.Height} - } - - channel := &RssFeed{ - Title: r.Title, - Link: r.Link.Href, - Description: r.Description, - ManagingEditor: author, - PubDate: pub, - LastBuildDate: build, - Copyright: r.Copyright, - Image: image, - } - for _, i := range r.Items { - channel.Items = append(channel.Items, newRssItem(i)) - } - return channel -} - -// FeedXml returns an XML-Ready object for an Rss object -func (r *Rss) FeedXml() interface{} { - // only generate version 2.0 feeds for now - return r.RssFeed().FeedXml() - -} - -// FeedXml returns an XML-ready object for an RssFeed object -func (r *RssFeed) FeedXml() interface{} { - return &RssFeedXml{ - Version: "2.0", - Channel: r, - ContentNamespace: "http://purl.org/rss/1.0/modules/content/", - } -} diff --git a/vendor/github.com/gorilla/feeds/test.atom b/vendor/github.com/gorilla/feeds/test.atom deleted file mode 100644 index aa15214..0000000 --- a/vendor/github.com/gorilla/feeds/test.atom +++ /dev/null @@ -1,92 +0,0 @@ - - - <![CDATA[Lorem ipsum feed for an interval of 1 minutes]]> - - http://example.com/ - RSS for Node - Tue, 30 Oct 2018 23:22:37 GMT - - Tue, 30 Oct 2018 23:22:00 GMT - - 60 - - <![CDATA[Lorem ipsum 2018-10-30T23:22:00+00:00]]> - - http://example.com/test/1540941720 - http://example.com/test/1540941720 - - Tue, 30 Oct 2018 23:22:00 GMT - - - <![CDATA[Lorem ipsum 2018-10-30T23:21:00+00:00]]> - - http://example.com/test/1540941660 - http://example.com/test/1540941660 - - Tue, 30 Oct 2018 23:21:00 GMT - - - <![CDATA[Lorem ipsum 2018-10-30T23:20:00+00:00]]> - - http://example.com/test/1540941600 - http://example.com/test/1540941600 - - Tue, 30 Oct 2018 23:20:00 GMT - - - <![CDATA[Lorem ipsum 2018-10-30T23:19:00+00:00]]> - - http://example.com/test/1540941540 - http://example.com/test/1540941540 - - Tue, 30 Oct 2018 23:19:00 GMT - - - <![CDATA[Lorem ipsum 2018-10-30T23:18:00+00:00]]> - - http://example.com/test/1540941480 - http://example.com/test/1540941480 - - Tue, 30 Oct 2018 23:18:00 GMT - - - <![CDATA[Lorem ipsum 2018-10-30T23:17:00+00:00]]> - - http://example.com/test/1540941420 - http://example.com/test/1540941420 - - Tue, 30 Oct 2018 23:17:00 GMT - - - <![CDATA[Lorem ipsum 2018-10-30T23:16:00+00:00]]> - - http://example.com/test/1540941360 - http://example.com/test/1540941360 - - Tue, 30 Oct 2018 23:16:00 GMT - - - <![CDATA[Lorem ipsum 2018-10-30T23:15:00+00:00]]> - - http://example.com/test/1540941300 - http://example.com/test/1540941300 - - Tue, 30 Oct 2018 23:15:00 GMT - - - <![CDATA[Lorem ipsum 2018-10-30T23:14:00+00:00]]> - - http://example.com/test/1540941240 - http://example.com/test/1540941240 - - Tue, 30 Oct 2018 23:14:00 GMT - - - <![CDATA[Lorem ipsum 2018-10-30T23:13:00+00:00]]> - - http://example.com/test/1540941180 - http://example.com/test/1540941180 - - Tue, 30 Oct 2018 23:13:00 GMT - - \ No newline at end of file diff --git a/vendor/github.com/gorilla/feeds/test.rss b/vendor/github.com/gorilla/feeds/test.rss deleted file mode 100644 index 8d912ab..0000000 --- a/vendor/github.com/gorilla/feeds/test.rss +++ /dev/null @@ -1,96 +0,0 @@ - - - - <![CDATA[Lorem ipsum feed for an interval of 1 minutes]]> - - http://example.com/ - RSS for Node - Tue, 30 Oct 2018 23:22:37 GMT - - Tue, 30 Oct 2018 23:22:00 GMT - - 60 - - <![CDATA[Lorem ipsum 2018-10-30T23:22:00+00:00]]> - - http://example.com/test/1540941720 - http://example.com/test/1540941720 - - Tue, 30 Oct 2018 23:22:00 GMT - - - <![CDATA[Lorem ipsum 2018-10-30T23:21:00+00:00]]> - - http://example.com/test/1540941660 - http://example.com/test/1540941660 - - Tue, 30 Oct 2018 23:21:00 GMT - - - <![CDATA[Lorem ipsum 2018-10-30T23:20:00+00:00]]> - - http://example.com/test/1540941600 - http://example.com/test/1540941600 - - Tue, 30 Oct 2018 23:20:00 GMT - - - <![CDATA[Lorem ipsum 2018-10-30T23:19:00+00:00]]> - - http://example.com/test/1540941540 - http://example.com/test/1540941540 - - Tue, 30 Oct 2018 23:19:00 GMT - - - <![CDATA[Lorem ipsum 2018-10-30T23:18:00+00:00]]> - - http://example.com/test/1540941480 - http://example.com/test/1540941480 - - Tue, 30 Oct 2018 23:18:00 GMT - - - <![CDATA[Lorem ipsum 2018-10-30T23:17:00+00:00]]> - - http://example.com/test/1540941420 - http://example.com/test/1540941420 - - Tue, 30 Oct 2018 23:17:00 GMT - - - <![CDATA[Lorem ipsum 2018-10-30T23:16:00+00:00]]> - - http://example.com/test/1540941360 - http://example.com/test/1540941360 - - Tue, 30 Oct 2018 23:16:00 GMT - - - <![CDATA[Lorem ipsum 2018-10-30T23:15:00+00:00]]> - - http://example.com/test/1540941300 - http://example.com/test/1540941300 - - Tue, 30 Oct 2018 23:15:00 GMT - - - <![CDATA[Lorem ipsum 2018-10-30T23:14:00+00:00]]> - - http://example.com/test/1540941240 - http://example.com/test/1540941240 - - Tue, 30 Oct 2018 23:14:00 GMT - - - <![CDATA[Lorem ipsum 2018-10-30T23:13:00+00:00]]> - - http://example.com/test/1540941180 - http://example.com/test/1540941180 - - Tue, 30 Oct 2018 23:13:00 GMT - - - \ No newline at end of file diff --git a/vendor/github.com/gorilla/feeds/to-implement.md b/vendor/github.com/gorilla/feeds/to-implement.md deleted file mode 100644 index 45fd1e7..0000000 --- a/vendor/github.com/gorilla/feeds/to-implement.md +++ /dev/null @@ -1,20 +0,0 @@ -[Full iTunes list](https://help.apple.com/itc/podcasts_connect/#/itcb54353390) - -[Example of ideal iTunes RSS feed](https://help.apple.com/itc/podcasts_connect/#/itcbaf351599) - -``` - - - - - - - - - - - - - - -``` \ No newline at end of file diff --git a/vendor/github.com/gorilla/feeds/uuid.go b/vendor/github.com/gorilla/feeds/uuid.go deleted file mode 100644 index 51bbafe..0000000 --- a/vendor/github.com/gorilla/feeds/uuid.go +++ /dev/null @@ -1,27 +0,0 @@ -package feeds - -// relevant bits from https://github.com/abneptis/GoUUID/blob/master/uuid.go - -import ( - "crypto/rand" - "fmt" -) - -type UUID [16]byte - -// create a new uuid v4 -func NewUUID() *UUID { - u := &UUID{} - _, err := rand.Read(u[:16]) - if err != nil { - panic(err) - } - - u[8] = (u[8] | 0x80) & 0xBf - u[6] = (u[6] | 0x40) & 0x4f - return u -} - -func (u *UUID) String() string { - return fmt.Sprintf("%x-%x-%x-%x-%x", u[:4], u[4:6], u[6:8], u[8:10], u[10:]) -} diff --git a/vendor/github.com/kennygrant/sanitize/.gitignore b/vendor/github.com/kennygrant/sanitize/.gitignore deleted file mode 100644 index 0026861..0000000 --- a/vendor/github.com/kennygrant/sanitize/.gitignore +++ /dev/null @@ -1,22 +0,0 @@ -# Compiled Object files, Static and Dynamic libs (Shared Objects) -*.o -*.a -*.so - -# Folders -_obj -_test - -# Architecture specific extensions/prefixes -*.[568vq] -[568vq].out - -*.cgo1.go -*.cgo2.c -_cgo_defun.c -_cgo_gotypes.go -_cgo_export.* - -_testmain.go - -*.exe diff --git a/vendor/github.com/kennygrant/sanitize/.travis.yml b/vendor/github.com/kennygrant/sanitize/.travis.yml deleted file mode 100644 index 4f2ee4d..0000000 --- a/vendor/github.com/kennygrant/sanitize/.travis.yml +++ /dev/null @@ -1 +0,0 @@ -language: go diff --git a/vendor/github.com/kennygrant/sanitize/BUILD.bazel b/vendor/github.com/kennygrant/sanitize/BUILD.bazel deleted file mode 100644 index 6ba8b9b..0000000 --- a/vendor/github.com/kennygrant/sanitize/BUILD.bazel +++ /dev/null @@ -1,10 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -go_library( - name = "sanitize", - srcs = ["sanitize.go"], - importmap = "peridot.resf.org/vendor/github.com/kennygrant/sanitize", - importpath = "github.com/kennygrant/sanitize", - visibility = ["//visibility:public"], - deps = ["@org_golang_x_net//html"], -) diff --git a/vendor/github.com/kennygrant/sanitize/LICENSE b/vendor/github.com/kennygrant/sanitize/LICENSE deleted file mode 100644 index 749ebb2..0000000 --- a/vendor/github.com/kennygrant/sanitize/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2017 Mechanism Design. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/vendor/github.com/kennygrant/sanitize/README.md b/vendor/github.com/kennygrant/sanitize/README.md deleted file mode 100644 index 4401ef7..0000000 --- a/vendor/github.com/kennygrant/sanitize/README.md +++ /dev/null @@ -1,62 +0,0 @@ -sanitize [![GoDoc](https://godoc.org/github.com/kennygrant/sanitize?status.svg)](https://godoc.org/github.com/kennygrant/sanitize) [![Go Report Card](https://goreportcard.com/badge/github.com/kennygrant/sanitize)](https://goreportcard.com/report/github.com/kennygrant/sanitize) [![CircleCI](https://circleci.com/gh/kennygrant/sanitize.svg?style=svg)](https://circleci.com/gh/kennygrant/sanitize) -======== - -Package sanitize provides functions to sanitize html and paths with go (golang). - -FUNCTIONS - - -```go -sanitize.Accents(s string) string -``` - -Accents replaces a set of accented characters with ascii equivalents. - -```go -sanitize.BaseName(s string) string -``` - -BaseName makes a string safe to use in a file name, producing a sanitized basename replacing . or / with -. Unlike Name no attempt is made to normalise text as a path. - -```go -sanitize.HTML(s string) string -``` - -HTML strips html tags with a very simple parser, replace common entities, and escape < and > in the result. The result is intended to be used as plain text. - -```go -sanitize.HTMLAllowing(s string, args...[]string) (string, error) -``` - -HTMLAllowing parses html and allow certain tags and attributes from the lists optionally specified by args - args[0] is a list of allowed tags, args[1] is a list of allowed attributes. If either is missing default sets are used. - -```go -sanitize.Name(s string) string -``` - -Name makes a string safe to use in a file name by first finding the path basename, then replacing non-ascii characters. - -```go -sanitize.Path(s string) string -``` - -Path makes a string safe to use as an url path. - - -Changes -------- - -Version 1.2 - -Adjusted HTML function to avoid linter warning -Added more tests from https://githubengineering.com/githubs-post-csp-journey/ -Chnaged name of license file -Added badges and change log to readme - -Version 1.1 -Fixed type in comments. -Merge pull request from Povilas Balzaravicius Pawka - - replace br tags with newline even when they contain a space - -Version 1.0 -First release \ No newline at end of file diff --git a/vendor/github.com/kennygrant/sanitize/sanitize.go b/vendor/github.com/kennygrant/sanitize/sanitize.go deleted file mode 100644 index 2932209..0000000 --- a/vendor/github.com/kennygrant/sanitize/sanitize.go +++ /dev/null @@ -1,388 +0,0 @@ -// Package sanitize provides functions for sanitizing text. -package sanitize - -import ( - "bytes" - "html" - "html/template" - "io" - "path" - "regexp" - "strings" - - parser "golang.org/x/net/html" -) - -var ( - ignoreTags = []string{"title", "script", "style", "iframe", "frame", "frameset", "noframes", "noembed", "embed", "applet", "object", "base"} - - defaultTags = []string{"h1", "h2", "h3", "h4", "h5", "h6", "div", "span", "hr", "p", "br", "b", "i", "strong", "em", "ol", "ul", "li", "a", "img", "pre", "code", "blockquote", "article", "section"} - - defaultAttributes = []string{"id", "class", "src", "href", "title", "alt", "name", "rel"} -) - -// HTMLAllowing sanitizes html, allowing some tags. -// Arrays of allowed tags and allowed attributes may optionally be passed as the second and third arguments. -func HTMLAllowing(s string, args ...[]string) (string, error) { - - allowedTags := defaultTags - if len(args) > 0 { - allowedTags = args[0] - } - allowedAttributes := defaultAttributes - if len(args) > 1 { - allowedAttributes = args[1] - } - - // Parse the html - tokenizer := parser.NewTokenizer(strings.NewReader(s)) - - buffer := bytes.NewBufferString("") - ignore := "" - - for { - tokenType := tokenizer.Next() - token := tokenizer.Token() - - switch tokenType { - - case parser.ErrorToken: - err := tokenizer.Err() - if err == io.EOF { - return buffer.String(), nil - } - return "", err - - case parser.StartTagToken: - - if len(ignore) == 0 && includes(allowedTags, token.Data) { - token.Attr = cleanAttributes(token.Attr, allowedAttributes) - buffer.WriteString(token.String()) - } else if includes(ignoreTags, token.Data) { - ignore = token.Data - } - - case parser.SelfClosingTagToken: - - if len(ignore) == 0 && includes(allowedTags, token.Data) { - token.Attr = cleanAttributes(token.Attr, allowedAttributes) - buffer.WriteString(token.String()) - } else if token.Data == ignore { - ignore = "" - } - - case parser.EndTagToken: - if len(ignore) == 0 && includes(allowedTags, token.Data) { - token.Attr = []parser.Attribute{} - buffer.WriteString(token.String()) - } else if token.Data == ignore { - ignore = "" - } - - case parser.TextToken: - // We allow text content through, unless ignoring this entire tag and its contents (including other tags) - if ignore == "" { - buffer.WriteString(token.String()) - } - case parser.CommentToken: - // We ignore comments by default - case parser.DoctypeToken: - // We ignore doctypes by default - html5 does not require them and this is intended for sanitizing snippets of text - default: - // We ignore unknown token types by default - - } - - } - -} - -// HTML strips html tags, replace common entities, and escapes <>&;'" in the result. -// Note the returned text may contain entities as it is escaped by HTMLEscapeString, and most entities are not translated. -func HTML(s string) (output string) { - - // Shortcut strings with no tags in them - if !strings.ContainsAny(s, "<>") { - output = s - } else { - - // First remove line breaks etc as these have no meaning outside html tags (except pre) - // this means pre sections will lose formatting... but will result in less unintentional paras. - s = strings.Replace(s, "\n", "", -1) - - // Then replace line breaks with newlines, to preserve that formatting - s = strings.Replace(s, "

", "\n", -1) - s = strings.Replace(s, "
", "\n", -1) - s = strings.Replace(s, "
", "\n", -1) - s = strings.Replace(s, "
", "\n", -1) - s = strings.Replace(s, "
", "\n", -1) - - // Walk through the string removing all tags - b := bytes.NewBufferString("") - inTag := false - for _, r := range s { - switch r { - case '<': - inTag = true - case '>': - inTag = false - default: - if !inTag { - b.WriteRune(r) - } - } - } - output = b.String() - } - - // Remove a few common harmless entities, to arrive at something more like plain text - output = strings.Replace(output, "‘", "'", -1) - output = strings.Replace(output, "’", "'", -1) - output = strings.Replace(output, "“", "\"", -1) - output = strings.Replace(output, "”", "\"", -1) - output = strings.Replace(output, " ", " ", -1) - output = strings.Replace(output, """, "\"", -1) - output = strings.Replace(output, "'", "'", -1) - - // Translate some entities into their plain text equivalent (for example accents, if encoded as entities) - output = html.UnescapeString(output) - - // In case we have missed any tags above, escape the text - removes <, >, &, ' and ". - output = template.HTMLEscapeString(output) - - // After processing, remove some harmless entities &, ' and " which are encoded by HTMLEscapeString - output = strings.Replace(output, """, "\"", -1) - output = strings.Replace(output, "'", "'", -1) - output = strings.Replace(output, "& ", "& ", -1) // NB space after - output = strings.Replace(output, "&amp; ", "& ", -1) // NB space after - - return output -} - -// We are very restrictive as this is intended for ascii url slugs -var illegalPath = regexp.MustCompile(`[^[:alnum:]\~\-\./]`) - -// Path makes a string safe to use as a URL path, -// removing accents and replacing separators with -. -// The path may still start at / and is not intended -// for use as a file system path without prefix. -func Path(s string) string { - // Start with lowercase string - filePath := strings.ToLower(s) - filePath = strings.Replace(filePath, "..", "", -1) - filePath = path.Clean(filePath) - - // Remove illegal characters for paths, flattening accents - // and replacing some common separators with - - filePath = cleanString(filePath, illegalPath) - - // NB this may be of length 0, caller must check - return filePath -} - -// Remove all other unrecognised characters apart from -var illegalName = regexp.MustCompile(`[^[:alnum:]-.]`) - -// Name makes a string safe to use in a file name by first finding the path basename, then replacing non-ascii characters. -func Name(s string) string { - // Start with lowercase string - fileName := strings.ToLower(s) - fileName = path.Clean(path.Base(fileName)) - - // Remove illegal characters for names, replacing some common separators with - - fileName = cleanString(fileName, illegalName) - - // NB this may be of length 0, caller must check - return fileName -} - -// Replace these separators with - -var baseNameSeparators = regexp.MustCompile(`[./]`) - -// BaseName makes a string safe to use in a file name, producing a sanitized basename replacing . or / with -. -// No attempt is made to normalise a path or normalise case. -func BaseName(s string) string { - - // Replace certain joining characters with a dash - baseName := baseNameSeparators.ReplaceAllString(s, "-") - - // Remove illegal characters for names, replacing some common separators with - - baseName = cleanString(baseName, illegalName) - - // NB this may be of length 0, caller must check - return baseName -} - -// A very limited list of transliterations to catch common european names translated to urls. -// This set could be expanded with at least caps and many more characters. -var transliterations = map[rune]string{ - 'À': "A", - 'Á': "A", - 'Â': "A", - 'Ã': "A", - 'Ä': "A", - 'Å': "AA", - 'Æ': "AE", - 'Ç': "C", - 'È': "E", - 'É': "E", - 'Ê': "E", - 'Ë': "E", - 'Ì': "I", - 'Í': "I", - 'Î': "I", - 'Ï': "I", - 'Ð': "D", - 'Ł': "L", - 'Ñ': "N", - 'Ò': "O", - 'Ó': "O", - 'Ô': "O", - 'Õ': "O", - 'Ö': "OE", - 'Ø': "OE", - 'Œ': "OE", - 'Ù': "U", - 'Ú': "U", - 'Ü': "UE", - 'Û': "U", - 'Ý': "Y", - 'Þ': "TH", - 'ẞ': "SS", - 'à': "a", - 'á': "a", - 'â': "a", - 'ã': "a", - 'ä': "ae", - 'å': "aa", - 'æ': "ae", - 'ç': "c", - 'è': "e", - 'é': "e", - 'ê': "e", - 'ë': "e", - 'ì': "i", - 'í': "i", - 'î': "i", - 'ï': "i", - 'ð': "d", - 'ł': "l", - 'ñ': "n", - 'ń': "n", - 'ò': "o", - 'ó': "o", - 'ô': "o", - 'õ': "o", - 'ō': "o", - 'ö': "oe", - 'ø': "oe", - 'œ': "oe", - 'ś': "s", - 'ù': "u", - 'ú': "u", - 'û': "u", - 'ū': "u", - 'ü': "ue", - 'ý': "y", - 'ÿ': "y", - 'ż': "z", - 'þ': "th", - 'ß': "ss", -} - -// Accents replaces a set of accented characters with ascii equivalents. -func Accents(s string) string { - // Replace some common accent characters - b := bytes.NewBufferString("") - for _, c := range s { - // Check transliterations first - if val, ok := transliterations[c]; ok { - b.WriteString(val) - } else { - b.WriteRune(c) - } - } - return b.String() -} - -var ( - // If the attribute contains data: or javascript: anywhere, ignore it - // we don't allow this in attributes as it is so frequently used for xss - // NB we allow spaces in the value, and lowercase. - illegalAttr = regexp.MustCompile(`(d\s*a\s*t\s*a|j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*)\s*:`) - - // We are far more restrictive with href attributes. - legalHrefAttr = regexp.MustCompile(`\A[/#][^/\\]?|mailto:|http://|https://`) -) - -// cleanAttributes returns an array of attributes after removing malicious ones. -func cleanAttributes(a []parser.Attribute, allowed []string) []parser.Attribute { - if len(a) == 0 { - return a - } - - var cleaned []parser.Attribute - for _, attr := range a { - if includes(allowed, attr.Key) { - - val := strings.ToLower(attr.Val) - - // Check for illegal attribute values - if illegalAttr.FindString(val) != "" { - attr.Val = "" - } - - // Check for legal href values - / mailto:// http:// or https:// - if attr.Key == "href" { - if legalHrefAttr.FindString(val) == "" { - attr.Val = "" - } - } - - // If we still have an attribute, append it to the array - if attr.Val != "" { - cleaned = append(cleaned, attr) - } - } - } - return cleaned -} - -// A list of characters we consider separators in normal strings and replace with our canonical separator - rather than removing. -var ( - separators = regexp.MustCompile(`[ &_=+:]`) - - dashes = regexp.MustCompile(`[\-]+`) -) - -// cleanString replaces separators with - and removes characters listed in the regexp provided from string. -// Accents, spaces, and all characters not in A-Za-z0-9 are replaced. -func cleanString(s string, r *regexp.Regexp) string { - - // Remove any trailing space to avoid ending on - - s = strings.Trim(s, " ") - - // Flatten accents first so that if we remove non-ascii we still get a legible name - s = Accents(s) - - // Replace certain joining characters with a dash - s = separators.ReplaceAllString(s, "-") - - // Remove all other unrecognised characters - NB we do allow any printable characters - s = r.ReplaceAllString(s, "") - - // Remove any multiple dashes caused by replacements above - s = dashes.ReplaceAllString(s, "-") - - return s -} - -// includes checks for inclusion of a string in a []string. -func includes(a []string, s string) bool { - for _, as := range a { - if as == s { - return true - } - } - return false -} diff --git a/vendor/github.com/saintfish/chardet/2022.go b/vendor/github.com/saintfish/chardet/2022.go deleted file mode 100644 index e667225..0000000 --- a/vendor/github.com/saintfish/chardet/2022.go +++ /dev/null @@ -1,102 +0,0 @@ -package chardet - -import ( - "bytes" -) - -type recognizer2022 struct { - charset string - escapes [][]byte -} - -func (r *recognizer2022) Match(input *recognizerInput) (output recognizerOutput) { - return recognizerOutput{ - Charset: r.charset, - Confidence: r.matchConfidence(input.input), - } -} - -func (r *recognizer2022) matchConfidence(input []byte) int { - var hits, misses, shifts int -input: - for i := 0; i < len(input); i++ { - c := input[i] - if c == 0x1B { - for _, esc := range r.escapes { - if bytes.HasPrefix(input[i+1:], esc) { - hits++ - i += len(esc) - continue input - } - } - misses++ - } else if c == 0x0E || c == 0x0F { - shifts++ - } - } - if hits == 0 { - return 0 - } - quality := (100*hits - 100*misses) / (hits + misses) - if hits+shifts < 5 { - quality -= (5 - (hits + shifts)) * 10 - } - if quality < 0 { - quality = 0 - } - return quality -} - -var escapeSequences_2022JP = [][]byte{ - {0x24, 0x28, 0x43}, // KS X 1001:1992 - {0x24, 0x28, 0x44}, // JIS X 212-1990 - {0x24, 0x40}, // JIS C 6226-1978 - {0x24, 0x41}, // GB 2312-80 - {0x24, 0x42}, // JIS X 208-1983 - {0x26, 0x40}, // JIS X 208 1990, 1997 - {0x28, 0x42}, // ASCII - {0x28, 0x48}, // JIS-Roman - {0x28, 0x49}, // Half-width katakana - {0x28, 0x4a}, // JIS-Roman - {0x2e, 0x41}, // ISO 8859-1 - {0x2e, 0x46}, // ISO 8859-7 -} - -var escapeSequences_2022KR = [][]byte{ - {0x24, 0x29, 0x43}, -} - -var escapeSequences_2022CN = [][]byte{ - {0x24, 0x29, 0x41}, // GB 2312-80 - {0x24, 0x29, 0x47}, // CNS 11643-1992 Plane 1 - {0x24, 0x2A, 0x48}, // CNS 11643-1992 Plane 2 - {0x24, 0x29, 0x45}, // ISO-IR-165 - {0x24, 0x2B, 0x49}, // CNS 11643-1992 Plane 3 - {0x24, 0x2B, 0x4A}, // CNS 11643-1992 Plane 4 - {0x24, 0x2B, 0x4B}, // CNS 11643-1992 Plane 5 - {0x24, 0x2B, 0x4C}, // CNS 11643-1992 Plane 6 - {0x24, 0x2B, 0x4D}, // CNS 11643-1992 Plane 7 - {0x4e}, // SS2 - {0x4f}, // SS3 -} - -func newRecognizer_2022JP() *recognizer2022 { - return &recognizer2022{ - "ISO-2022-JP", - escapeSequences_2022JP, - } -} - -func newRecognizer_2022KR() *recognizer2022 { - return &recognizer2022{ - "ISO-2022-KR", - escapeSequences_2022KR, - } -} - -func newRecognizer_2022CN() *recognizer2022 { - return &recognizer2022{ - "ISO-2022-CN", - escapeSequences_2022CN, - } -} diff --git a/vendor/github.com/saintfish/chardet/AUTHORS b/vendor/github.com/saintfish/chardet/AUTHORS deleted file mode 100644 index 842d021..0000000 --- a/vendor/github.com/saintfish/chardet/AUTHORS +++ /dev/null @@ -1 +0,0 @@ -Sheng Yu (yusheng dot sjtu at gmail dot com) diff --git a/vendor/github.com/saintfish/chardet/BUILD.bazel b/vendor/github.com/saintfish/chardet/BUILD.bazel deleted file mode 100644 index 43aa411..0000000 --- a/vendor/github.com/saintfish/chardet/BUILD.bazel +++ /dev/null @@ -1,17 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -go_library( - name = "chardet", - srcs = [ - "2022.go", - "detector.go", - "multi_byte.go", - "recognizer.go", - "single_byte.go", - "unicode.go", - "utf8.go", - ], - importmap = "peridot.resf.org/vendor/github.com/saintfish/chardet", - importpath = "github.com/saintfish/chardet", - visibility = ["//visibility:public"], -) diff --git a/vendor/github.com/saintfish/chardet/LICENSE b/vendor/github.com/saintfish/chardet/LICENSE deleted file mode 100644 index 35ee796..0000000 --- a/vendor/github.com/saintfish/chardet/LICENSE +++ /dev/null @@ -1,22 +0,0 @@ -Copyright (c) 2012 chardet Authors - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -Partial of the Software is derived from ICU project. See icu-license.html for -license of the derivative portions. diff --git a/vendor/github.com/saintfish/chardet/README.md b/vendor/github.com/saintfish/chardet/README.md deleted file mode 100644 index 4281eec..0000000 --- a/vendor/github.com/saintfish/chardet/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# chardet - -chardet is library to automatically detect -[charset](http://en.wikipedia.org/wiki/Character_encoding) of texts for [Go -programming language](http://golang.org/). It's based on the algorithm and data -in [ICU](http://icu-project.org/)'s implementation. - -## Documentation and Usage - -See [pkgdoc](http://go.pkgdoc.org/github.com/saintfish/chardet) diff --git a/vendor/github.com/saintfish/chardet/detector.go b/vendor/github.com/saintfish/chardet/detector.go deleted file mode 100644 index e11c222..0000000 --- a/vendor/github.com/saintfish/chardet/detector.go +++ /dev/null @@ -1,136 +0,0 @@ -// Package chardet ports character set detection from ICU. -package chardet - -import ( - "errors" - "sort" -) - -// Result contains all the information that charset detector gives. -type Result struct { - // IANA name of the detected charset. - Charset string - // IANA name of the detected language. It may be empty for some charsets. - Language string - // Confidence of the Result. Scale from 1 to 100. The bigger, the more confident. - Confidence int -} - -// Detector implements charset detection. -type Detector struct { - recognizers []recognizer - stripTag bool -} - -// List of charset recognizers -var recognizers = []recognizer{ - newRecognizer_utf8(), - newRecognizer_utf16be(), - newRecognizer_utf16le(), - newRecognizer_utf32be(), - newRecognizer_utf32le(), - newRecognizer_8859_1_en(), - newRecognizer_8859_1_da(), - newRecognizer_8859_1_de(), - newRecognizer_8859_1_es(), - newRecognizer_8859_1_fr(), - newRecognizer_8859_1_it(), - newRecognizer_8859_1_nl(), - newRecognizer_8859_1_no(), - newRecognizer_8859_1_pt(), - newRecognizer_8859_1_sv(), - newRecognizer_8859_2_cs(), - newRecognizer_8859_2_hu(), - newRecognizer_8859_2_pl(), - newRecognizer_8859_2_ro(), - newRecognizer_8859_5_ru(), - newRecognizer_8859_6_ar(), - newRecognizer_8859_7_el(), - newRecognizer_8859_8_I_he(), - newRecognizer_8859_8_he(), - newRecognizer_windows_1251(), - newRecognizer_windows_1256(), - newRecognizer_KOI8_R(), - newRecognizer_8859_9_tr(), - - newRecognizer_sjis(), - newRecognizer_gb_18030(), - newRecognizer_euc_jp(), - newRecognizer_euc_kr(), - newRecognizer_big5(), - - newRecognizer_2022JP(), - newRecognizer_2022KR(), - newRecognizer_2022CN(), - - newRecognizer_IBM424_he_rtl(), - newRecognizer_IBM424_he_ltr(), - newRecognizer_IBM420_ar_rtl(), - newRecognizer_IBM420_ar_ltr(), -} - -// NewTextDetector creates a Detector for plain text. -func NewTextDetector() *Detector { - return &Detector{recognizers, false} -} - -// NewHtmlDetector creates a Detector for Html. -func NewHtmlDetector() *Detector { - return &Detector{recognizers, true} -} - -var ( - NotDetectedError = errors.New("Charset not detected.") -) - -// DetectBest returns the Result with highest Confidence. -func (d *Detector) DetectBest(b []byte) (r *Result, err error) { - var all []Result - if all, err = d.DetectAll(b); err == nil { - r = &all[0] - } - return -} - -// DetectAll returns all Results which have non-zero Confidence. The Results are sorted by Confidence in descending order. -func (d *Detector) DetectAll(b []byte) ([]Result, error) { - input := newRecognizerInput(b, d.stripTag) - outputChan := make(chan recognizerOutput) - for _, r := range d.recognizers { - go matchHelper(r, input, outputChan) - } - outputs := make([]recognizerOutput, 0, len(d.recognizers)) - for i := 0; i < len(d.recognizers); i++ { - o := <-outputChan - if o.Confidence > 0 { - outputs = append(outputs, o) - } - } - if len(outputs) == 0 { - return nil, NotDetectedError - } - - sort.Sort(recognizerOutputs(outputs)) - dedupOutputs := make([]Result, 0, len(outputs)) - foundCharsets := make(map[string]struct{}, len(outputs)) - for _, o := range outputs { - if _, found := foundCharsets[o.Charset]; !found { - dedupOutputs = append(dedupOutputs, Result(o)) - foundCharsets[o.Charset] = struct{}{} - } - } - if len(dedupOutputs) == 0 { - return nil, NotDetectedError - } - return dedupOutputs, nil -} - -func matchHelper(r recognizer, input *recognizerInput, outputChan chan<- recognizerOutput) { - outputChan <- r.Match(input) -} - -type recognizerOutputs []recognizerOutput - -func (r recognizerOutputs) Len() int { return len(r) } -func (r recognizerOutputs) Less(i, j int) bool { return r[i].Confidence > r[j].Confidence } -func (r recognizerOutputs) Swap(i, j int) { r[i], r[j] = r[j], r[i] } diff --git a/vendor/github.com/saintfish/chardet/icu-license.html b/vendor/github.com/saintfish/chardet/icu-license.html deleted file mode 100644 index d078d05..0000000 --- a/vendor/github.com/saintfish/chardet/icu-license.html +++ /dev/null @@ -1,51 +0,0 @@ - - - - -ICU License - ICU 1.8.1 and later - - - -

ICU License - ICU 1.8.1 and later

- -

COPYRIGHT AND PERMISSION NOTICE

- -

-Copyright (c) 1995-2012 International Business Machines Corporation and others -

-

-All rights reserved. -

-

-Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, and/or sell -copies of the Software, and to permit persons -to whom the Software is furnished to do so, provided that the above -copyright notice(s) and this permission notice appear in all copies -of the Software and that both the above copyright notice(s) and this -permission notice appear in supporting documentation. -

-

-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL -THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, -OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER -RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, -NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE -USE OR PERFORMANCE OF THIS SOFTWARE. -

-

-Except as contained in this notice, the name of a copyright holder shall not be -used in advertising or otherwise to promote the sale, use or other dealings in -this Software without prior written authorization of the copyright holder. -

- -
-

-All trademarks and registered trademarks mentioned herein are the property of their respective owners. -

- - diff --git a/vendor/github.com/saintfish/chardet/multi_byte.go b/vendor/github.com/saintfish/chardet/multi_byte.go deleted file mode 100644 index 1fab34c..0000000 --- a/vendor/github.com/saintfish/chardet/multi_byte.go +++ /dev/null @@ -1,345 +0,0 @@ -package chardet - -import ( - "errors" - "math" -) - -type recognizerMultiByte struct { - charset string - language string - decoder charDecoder - commonChars []uint16 -} - -type charDecoder interface { - DecodeOneChar([]byte) (c uint16, remain []byte, err error) -} - -func (r *recognizerMultiByte) Match(input *recognizerInput) (output recognizerOutput) { - return recognizerOutput{ - Charset: r.charset, - Language: r.language, - Confidence: r.matchConfidence(input), - } -} - -func (r *recognizerMultiByte) matchConfidence(input *recognizerInput) int { - raw := input.raw - var c uint16 - var err error - var totalCharCount, badCharCount, singleByteCharCount, doubleByteCharCount, commonCharCount int - for c, raw, err = r.decoder.DecodeOneChar(raw); len(raw) > 0; c, raw, err = r.decoder.DecodeOneChar(raw) { - totalCharCount++ - if err != nil { - badCharCount++ - } else if c <= 0xFF { - singleByteCharCount++ - } else { - doubleByteCharCount++ - if r.commonChars != nil && binarySearch(r.commonChars, c) { - commonCharCount++ - } - } - if badCharCount >= 2 && badCharCount*5 >= doubleByteCharCount { - return 0 - } - } - - if doubleByteCharCount <= 10 && badCharCount == 0 { - if doubleByteCharCount == 0 && totalCharCount < 10 { - return 0 - } else { - return 10 - } - } - - if doubleByteCharCount < 20*badCharCount { - return 0 - } - if r.commonChars == nil { - confidence := 30 + doubleByteCharCount - 20*badCharCount - if confidence > 100 { - confidence = 100 - } - return confidence - } - maxVal := math.Log(float64(doubleByteCharCount) / 4) - scaleFactor := 90 / maxVal - confidence := int(math.Log(float64(commonCharCount)+1)*scaleFactor + 10) - if confidence > 100 { - confidence = 100 - } - if confidence < 0 { - confidence = 0 - } - return confidence -} - -func binarySearch(l []uint16, c uint16) bool { - start := 0 - end := len(l) - 1 - for start <= end { - mid := (start + end) / 2 - if c == l[mid] { - return true - } else if c < l[mid] { - end = mid - 1 - } else { - start = mid + 1 - } - } - return false -} - -var eobError = errors.New("End of input buffer") -var badCharError = errors.New("Decode a bad char") - -type charDecoder_sjis struct { -} - -func (charDecoder_sjis) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { - if len(input) == 0 { - return 0, nil, eobError - } - first := input[0] - c = uint16(first) - remain = input[1:] - if first <= 0x7F || (first > 0xA0 && first <= 0xDF) { - return - } - if len(remain) == 0 { - return c, remain, badCharError - } - second := remain[0] - remain = remain[1:] - c = c<<8 | uint16(second) - if (second >= 0x40 && second <= 0x7F) || (second >= 0x80 && second <= 0xFE) { - } else { - err = badCharError - } - return -} - -var commonChars_sjis = []uint16{ - 0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0, - 0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5, - 0x82b7, 0x82bd, 0x82be, 0x82c1, 0x82c4, 0x82c5, 0x82c6, 0x82c8, 0x82c9, 0x82cc, - 0x82cd, 0x82dc, 0x82e0, 0x82e7, 0x82e8, 0x82e9, 0x82ea, 0x82f0, 0x82f1, 0x8341, - 0x8343, 0x834e, 0x834f, 0x8358, 0x835e, 0x8362, 0x8367, 0x8375, 0x8376, 0x8389, - 0x838a, 0x838b, 0x838d, 0x8393, 0x8e96, 0x93fa, 0x95aa, -} - -func newRecognizer_sjis() *recognizerMultiByte { - return &recognizerMultiByte{ - "Shift_JIS", - "ja", - charDecoder_sjis{}, - commonChars_sjis, - } -} - -type charDecoder_euc struct { -} - -func (charDecoder_euc) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { - if len(input) == 0 { - return 0, nil, eobError - } - first := input[0] - remain = input[1:] - c = uint16(first) - if first <= 0x8D { - return uint16(first), remain, nil - } - if len(remain) == 0 { - return 0, nil, eobError - } - second := remain[0] - remain = remain[1:] - c = c<<8 | uint16(second) - if first >= 0xA1 && first <= 0xFE { - if second < 0xA1 { - err = badCharError - } - return - } - if first == 0x8E { - if second < 0xA1 { - err = badCharError - } - return - } - if first == 0x8F { - if len(remain) == 0 { - return 0, nil, eobError - } - third := remain[0] - remain = remain[1:] - c = c<<0 | uint16(third) - if third < 0xa1 { - err = badCharError - } - } - return -} - -var commonChars_euc_jp = []uint16{ - 0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2, - 0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3, - 0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 0xa4bf, 0xa4c0, 0xa4c1, 0xa4c3, 0xa4c4, - 0xa4c6, 0xa4c7, 0xa4c8, 0xa4c9, 0xa4ca, 0xa4cb, 0xa4ce, 0xa4cf, 0xa4d0, 0xa4de, - 0xa4df, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e8, 0xa4e9, 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ef, - 0xa4f2, 0xa4f3, 0xa5a2, 0xa5a3, 0xa5a4, 0xa5a6, 0xa5a7, 0xa5aa, 0xa5ad, 0xa5af, - 0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7, - 0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1, - 0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee, - 0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1, -} - -var commonChars_euc_kr = []uint16{ - 0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc, - 0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9, - 0xb4eb, 0xb5a5, 0xb5b5, 0xb5bf, 0xb5c7, 0xb5e9, 0xb6f3, 0xb7af, 0xb7c2, 0xb7ce, - 0xb8a6, 0xb8ae, 0xb8b6, 0xb8b8, 0xb8bb, 0xb8e9, 0xb9ab, 0xb9ae, 0xb9cc, 0xb9ce, - 0xb9fd, 0xbab8, 0xbace, 0xbad0, 0xbaf1, 0xbbe7, 0xbbf3, 0xbbfd, 0xbcad, 0xbcba, - 0xbcd2, 0xbcf6, 0xbdba, 0xbdc0, 0xbdc3, 0xbdc5, 0xbec6, 0xbec8, 0xbedf, 0xbeee, - 0xbef8, 0xbefa, 0xbfa1, 0xbfa9, 0xbfc0, 0xbfe4, 0xbfeb, 0xbfec, 0xbff8, 0xc0a7, - 0xc0af, 0xc0b8, 0xc0ba, 0xc0bb, 0xc0bd, 0xc0c7, 0xc0cc, 0xc0ce, 0xc0cf, 0xc0d6, - 0xc0da, 0xc0e5, 0xc0fb, 0xc0fc, 0xc1a4, 0xc1a6, 0xc1b6, 0xc1d6, 0xc1df, 0xc1f6, - 0xc1f8, 0xc4a1, 0xc5cd, 0xc6ae, 0xc7cf, 0xc7d1, 0xc7d2, 0xc7d8, 0xc7e5, 0xc8ad, -} - -func newRecognizer_euc_jp() *recognizerMultiByte { - return &recognizerMultiByte{ - "EUC-JP", - "ja", - charDecoder_euc{}, - commonChars_euc_jp, - } -} - -func newRecognizer_euc_kr() *recognizerMultiByte { - return &recognizerMultiByte{ - "EUC-KR", - "ko", - charDecoder_euc{}, - commonChars_euc_kr, - } -} - -type charDecoder_big5 struct { -} - -func (charDecoder_big5) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { - if len(input) == 0 { - return 0, nil, eobError - } - first := input[0] - remain = input[1:] - c = uint16(first) - if first <= 0x7F || first == 0xFF { - return - } - if len(remain) == 0 { - return c, nil, eobError - } - second := remain[0] - remain = remain[1:] - c = c<<8 | uint16(second) - if second < 0x40 || second == 0x7F || second == 0xFF { - err = badCharError - } - return -} - -var commonChars_big5 = []uint16{ - 0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446, - 0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3, - 0xa4a4, 0xa4a7, 0xa4c1, 0xa4ce, 0xa4d1, 0xa4df, 0xa4e8, 0xa4fd, 0xa540, 0xa548, - 0xa558, 0xa569, 0xa5cd, 0xa5e7, 0xa657, 0xa661, 0xa662, 0xa668, 0xa670, 0xa6a8, - 0xa6b3, 0xa6b9, 0xa6d3, 0xa6db, 0xa6e6, 0xa6f2, 0xa740, 0xa751, 0xa759, 0xa7da, - 0xa8a3, 0xa8a5, 0xa8ad, 0xa8d1, 0xa8d3, 0xa8e4, 0xa8fc, 0xa9c0, 0xa9d2, 0xa9f3, - 0xaa6b, 0xaaba, 0xaabe, 0xaacc, 0xaafc, 0xac47, 0xac4f, 0xacb0, 0xacd2, 0xad59, - 0xaec9, 0xafe0, 0xb0ea, 0xb16f, 0xb2b3, 0xb2c4, 0xb36f, 0xb44c, 0xb44e, 0xb54c, - 0xb5a5, 0xb5bd, 0xb5d0, 0xb5d8, 0xb671, 0xb7ed, 0xb867, 0xb944, 0xbad8, 0xbb44, - 0xbba1, 0xbdd1, 0xc2c4, 0xc3b9, 0xc440, 0xc45f, -} - -func newRecognizer_big5() *recognizerMultiByte { - return &recognizerMultiByte{ - "Big5", - "zh", - charDecoder_big5{}, - commonChars_big5, - } -} - -type charDecoder_gb_18030 struct { -} - -func (charDecoder_gb_18030) DecodeOneChar(input []byte) (c uint16, remain []byte, err error) { - if len(input) == 0 { - return 0, nil, eobError - } - first := input[0] - remain = input[1:] - c = uint16(first) - if first <= 0x80 { - return - } - if len(remain) == 0 { - return 0, nil, eobError - } - second := remain[0] - remain = remain[1:] - c = c<<8 | uint16(second) - if first >= 0x81 && first <= 0xFE { - if (second >= 0x40 && second <= 0x7E) || (second >= 0x80 && second <= 0xFE) { - return - } - - if second >= 0x30 && second <= 0x39 { - if len(remain) == 0 { - return 0, nil, eobError - } - third := remain[0] - remain = remain[1:] - if third >= 0x81 && third <= 0xFE { - if len(remain) == 0 { - return 0, nil, eobError - } - fourth := remain[0] - remain = remain[1:] - if fourth >= 0x30 && fourth <= 0x39 { - c = c<<16 | uint16(third)<<8 | uint16(fourth) - return - } - } - } - err = badCharError - } - return -} - -var commonChars_gb_18030 = []uint16{ - 0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac, - 0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4, - 0xb5e3, 0xb6af, 0xb6d4, 0xb6e0, 0xb7a2, 0xb7a8, 0xb7bd, 0xb7d6, 0xb7dd, 0xb8b4, - 0xb8df, 0xb8f6, 0xb9ab, 0xb9c9, 0xb9d8, 0xb9fa, 0xb9fd, 0xbacd, 0xbba7, 0xbbd6, - 0xbbe1, 0xbbfa, 0xbcbc, 0xbcdb, 0xbcfe, 0xbdcc, 0xbecd, 0xbedd, 0xbfb4, 0xbfc6, - 0xbfc9, 0xc0b4, 0xc0ed, 0xc1cb, 0xc2db, 0xc3c7, 0xc4dc, 0xc4ea, 0xc5cc, 0xc6f7, - 0xc7f8, 0xc8ab, 0xc8cb, 0xc8d5, 0xc8e7, 0xc9cf, 0xc9fa, 0xcab1, 0xcab5, 0xcac7, - 0xcad0, 0xcad6, 0xcaf5, 0xcafd, 0xccec, 0xcdf8, 0xceaa, 0xcec4, 0xced2, 0xcee5, - 0xcfb5, 0xcfc2, 0xcfd6, 0xd0c2, 0xd0c5, 0xd0d0, 0xd0d4, 0xd1a7, 0xd2aa, 0xd2b2, - 0xd2b5, 0xd2bb, 0xd2d4, 0xd3c3, 0xd3d0, 0xd3fd, 0xd4c2, 0xd4da, 0xd5e2, 0xd6d0, -} - -func newRecognizer_gb_18030() *recognizerMultiByte { - return &recognizerMultiByte{ - "GB-18030", - "zh", - charDecoder_gb_18030{}, - commonChars_gb_18030, - } -} diff --git a/vendor/github.com/saintfish/chardet/recognizer.go b/vendor/github.com/saintfish/chardet/recognizer.go deleted file mode 100644 index 1bf8461..0000000 --- a/vendor/github.com/saintfish/chardet/recognizer.go +++ /dev/null @@ -1,83 +0,0 @@ -package chardet - -type recognizer interface { - Match(*recognizerInput) recognizerOutput -} - -type recognizerOutput Result - -type recognizerInput struct { - raw []byte - input []byte - tagStripped bool - byteStats []int - hasC1Bytes bool -} - -func newRecognizerInput(raw []byte, stripTag bool) *recognizerInput { - input, stripped := mayStripInput(raw, stripTag) - byteStats := computeByteStats(input) - return &recognizerInput{ - raw: raw, - input: input, - tagStripped: stripped, - byteStats: byteStats, - hasC1Bytes: computeHasC1Bytes(byteStats), - } -} - -func mayStripInput(raw []byte, stripTag bool) (out []byte, stripped bool) { - const inputBufferSize = 8192 - out = make([]byte, 0, inputBufferSize) - var badTags, openTags int32 - var inMarkup bool = false - stripped = false - if stripTag { - stripped = true - for _, c := range raw { - if c == '<' { - if inMarkup { - badTags += 1 - } - inMarkup = true - openTags += 1 - } - if !inMarkup { - out = append(out, c) - if len(out) >= inputBufferSize { - break - } - } - if c == '>' { - inMarkup = false - } - } - } - if openTags < 5 || openTags/5 < badTags || (len(out) < 100 && len(raw) > 600) { - limit := len(raw) - if limit > inputBufferSize { - limit = inputBufferSize - } - out = make([]byte, limit) - copy(out, raw[:limit]) - stripped = false - } - return -} - -func computeByteStats(input []byte) []int { - r := make([]int, 256) - for _, c := range input { - r[c] += 1 - } - return r -} - -func computeHasC1Bytes(byteStats []int) bool { - for _, count := range byteStats[0x80 : 0x9F+1] { - if count > 0 { - return true - } - } - return false -} diff --git a/vendor/github.com/saintfish/chardet/single_byte.go b/vendor/github.com/saintfish/chardet/single_byte.go deleted file mode 100644 index efe41c9..0000000 --- a/vendor/github.com/saintfish/chardet/single_byte.go +++ /dev/null @@ -1,882 +0,0 @@ -package chardet - -// Recognizer for single byte charset family -type recognizerSingleByte struct { - charset string - hasC1ByteCharset string - language string - charMap *[256]byte - ngram *[64]uint32 -} - -func (r *recognizerSingleByte) Match(input *recognizerInput) recognizerOutput { - var charset string = r.charset - if input.hasC1Bytes && len(r.hasC1ByteCharset) > 0 { - charset = r.hasC1ByteCharset - } - return recognizerOutput{ - Charset: charset, - Language: r.language, - Confidence: r.parseNgram(input.input), - } -} - -type ngramState struct { - ngram uint32 - ignoreSpace bool - ngramCount, ngramHit uint32 - table *[64]uint32 -} - -func newNgramState(table *[64]uint32) *ngramState { - return &ngramState{ - ngram: 0, - ignoreSpace: false, - ngramCount: 0, - ngramHit: 0, - table: table, - } -} - -func (s *ngramState) AddByte(b byte) { - const ngramMask = 0xFFFFFF - if !(b == 0x20 && s.ignoreSpace) { - s.ngram = ((s.ngram << 8) | uint32(b)) & ngramMask - s.ignoreSpace = (s.ngram == 0x20) - s.ngramCount++ - if s.lookup() { - s.ngramHit++ - } - } - s.ignoreSpace = (b == 0x20) -} - -func (s *ngramState) HitRate() float32 { - if s.ngramCount == 0 { - return 0 - } - return float32(s.ngramHit) / float32(s.ngramCount) -} - -func (s *ngramState) lookup() bool { - var index int - if s.table[index+32] <= s.ngram { - index += 32 - } - if s.table[index+16] <= s.ngram { - index += 16 - } - if s.table[index+8] <= s.ngram { - index += 8 - } - if s.table[index+4] <= s.ngram { - index += 4 - } - if s.table[index+2] <= s.ngram { - index += 2 - } - if s.table[index+1] <= s.ngram { - index += 1 - } - if s.table[index] > s.ngram { - index -= 1 - } - if index < 0 || s.table[index] != s.ngram { - return false - } - return true -} - -func (r *recognizerSingleByte) parseNgram(input []byte) int { - state := newNgramState(r.ngram) - for _, inChar := range input { - c := r.charMap[inChar] - if c != 0 { - state.AddByte(c) - } - } - state.AddByte(0x20) - rate := state.HitRate() - if rate > 0.33 { - return 98 - } - return int(rate * 300) -} - -var charMap_8859_1 = [256]byte{ - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, - 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, - 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, - 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, - 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, - 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, - 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, - 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, - 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, - 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, -} - -var ngrams_8859_1_en = [64]uint32{ - 0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F, - 0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74, - 0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420, - 0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320, -} - -var ngrams_8859_1_da = [64]uint32{ - 0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620, - 0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320, - 0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520, - 0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572, -} - -var ngrams_8859_1_de = [64]uint32{ - 0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F, - 0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220, - 0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465, - 0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572, -} - -var ngrams_8859_1_es = [64]uint32{ - 0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, - 0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C, - 0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064, - 0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20, -} - -var ngrams_8859_1_fr = [64]uint32{ - 0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E, - 0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20, - 0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420, - 0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220, -} - -var ngrams_8859_1_it = [64]uint32{ - 0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073, - 0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220, - 0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20, - 0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F, -} - -var ngrams_8859_1_nl = [64]uint32{ - 0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665, - 0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E, - 0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F, - 0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F, -} - -var ngrams_8859_1_no = [64]uint32{ - 0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469, - 0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474, - 0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65, - 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572, -} - -var ngrams_8859_1_pt = [64]uint32{ - 0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365, - 0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20, - 0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065, - 0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F, -} - -var ngrams_8859_1_sv = [64]uint32{ - 0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469, - 0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220, - 0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20, - 0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220, -} - -func newRecognizer_8859_1(language string, ngram *[64]uint32) *recognizerSingleByte { - return &recognizerSingleByte{ - charset: "ISO-8859-1", - hasC1ByteCharset: "windows-1252", - language: language, - charMap: &charMap_8859_1, - ngram: ngram, - } -} - -func newRecognizer_8859_1_en() *recognizerSingleByte { - return newRecognizer_8859_1("en", &ngrams_8859_1_en) -} -func newRecognizer_8859_1_da() *recognizerSingleByte { - return newRecognizer_8859_1("da", &ngrams_8859_1_da) -} -func newRecognizer_8859_1_de() *recognizerSingleByte { - return newRecognizer_8859_1("de", &ngrams_8859_1_de) -} -func newRecognizer_8859_1_es() *recognizerSingleByte { - return newRecognizer_8859_1("es", &ngrams_8859_1_es) -} -func newRecognizer_8859_1_fr() *recognizerSingleByte { - return newRecognizer_8859_1("fr", &ngrams_8859_1_fr) -} -func newRecognizer_8859_1_it() *recognizerSingleByte { - return newRecognizer_8859_1("it", &ngrams_8859_1_it) -} -func newRecognizer_8859_1_nl() *recognizerSingleByte { - return newRecognizer_8859_1("nl", &ngrams_8859_1_nl) -} -func newRecognizer_8859_1_no() *recognizerSingleByte { - return newRecognizer_8859_1("no", &ngrams_8859_1_no) -} -func newRecognizer_8859_1_pt() *recognizerSingleByte { - return newRecognizer_8859_1("pt", &ngrams_8859_1_pt) -} -func newRecognizer_8859_1_sv() *recognizerSingleByte { - return newRecognizer_8859_1("sv", &ngrams_8859_1_sv) -} - -var charMap_8859_2 = [256]byte{ - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20, - 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, - 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7, - 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF, - 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, - 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, - 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, - 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, - 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, - 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, - 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, - 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, -} - -var ngrams_8859_2_cs = [64]uint32{ - 0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F, - 0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465, - 0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865, - 0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564, -} - -var ngrams_8859_2_hu = [64]uint32{ - 0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69, - 0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20, - 0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061, - 0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320, -} - -var ngrams_8859_2_pl = [64]uint32{ - 0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779, - 0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20, - 0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769, - 0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720, -} - -var ngrams_8859_2_ro = [64]uint32{ - 0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69, - 0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070, - 0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72, - 0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20, -} - -func newRecognizer_8859_2(language string, ngram *[64]uint32) *recognizerSingleByte { - return &recognizerSingleByte{ - charset: "ISO-8859-2", - hasC1ByteCharset: "windows-1250", - language: language, - charMap: &charMap_8859_2, - ngram: ngram, - } -} - -func newRecognizer_8859_2_cs() *recognizerSingleByte { - return newRecognizer_8859_1("cs", &ngrams_8859_2_cs) -} -func newRecognizer_8859_2_hu() *recognizerSingleByte { - return newRecognizer_8859_1("hu", &ngrams_8859_2_hu) -} -func newRecognizer_8859_2_pl() *recognizerSingleByte { - return newRecognizer_8859_1("pl", &ngrams_8859_2_pl) -} -func newRecognizer_8859_2_ro() *recognizerSingleByte { - return newRecognizer_8859_1("ro", &ngrams_8859_2_ro) -} - -var charMap_8859_5 = [256]byte{ - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, - 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, - 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, - 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, - 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, - 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, - 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, - 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, - 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, - 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, - 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, - 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF, -} - -var ngrams_8859_5_ru = [64]uint32{ - 0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE, - 0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD, - 0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2, - 0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520, -} - -func newRecognizer_8859_5(language string, ngram *[64]uint32) *recognizerSingleByte { - return &recognizerSingleByte{ - charset: "ISO-8859-5", - language: language, - charMap: &charMap_8859_5, - ngram: ngram, - } -} - -func newRecognizer_8859_5_ru() *recognizerSingleByte { - return newRecognizer_8859_5("ru", &ngrams_8859_5_ru) -} - -var charMap_8859_6 = [256]byte{ - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, - 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, - 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, - 0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20, - 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, - 0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, -} - -var ngrams_8859_6_ar = [64]uint32{ - 0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8, - 0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1, - 0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20, - 0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620, -} - -func newRecognizer_8859_6(language string, ngram *[64]uint32) *recognizerSingleByte { - return &recognizerSingleByte{ - charset: "ISO-8859-6", - language: language, - charMap: &charMap_8859_6, - ngram: ngram, - } -} - -func newRecognizer_8859_6_ar() *recognizerSingleByte { - return newRecognizer_8859_6("ar", &ngrams_8859_6_ar) -} - -var charMap_8859_7 = [256]byte{ - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20, - 0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE, - 0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, - 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, - 0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, - 0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF, - 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, - 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, - 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, - 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20, -} - -var ngrams_8859_7_el = [64]uint32{ - 0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7, - 0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120, - 0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5, - 0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20, -} - -func newRecognizer_8859_7(language string, ngram *[64]uint32) *recognizerSingleByte { - return &recognizerSingleByte{ - charset: "ISO-8859-7", - hasC1ByteCharset: "windows-1253", - language: language, - charMap: &charMap_8859_7, - ngram: ngram, - } -} - -func newRecognizer_8859_7_el() *recognizerSingleByte { - return newRecognizer_8859_7("el", &ngrams_8859_7_el) -} - -var charMap_8859_8 = [256]byte{ - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, - 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, - 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, - 0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20, -} - -var ngrams_8859_8_I_he = [64]uint32{ - 0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0, - 0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4, - 0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE, - 0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9, -} - -var ngrams_8859_8_he = [64]uint32{ - 0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0, - 0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC, - 0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920, - 0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9, -} - -func newRecognizer_8859_8(language string, ngram *[64]uint32) *recognizerSingleByte { - return &recognizerSingleByte{ - charset: "ISO-8859-8", - hasC1ByteCharset: "windows-1255", - language: language, - charMap: &charMap_8859_8, - ngram: ngram, - } -} - -func newRecognizer_8859_8_I_he() *recognizerSingleByte { - r := newRecognizer_8859_8("he", &ngrams_8859_8_I_he) - r.charset = "ISO-8859-8-I" - return r -} - -func newRecognizer_8859_8_he() *recognizerSingleByte { - return newRecognizer_8859_8("he", &ngrams_8859_8_he) -} - -var charMap_8859_9 = [256]byte{ - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, - 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20, - 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, - 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, - 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, - 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF, - 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, - 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, - 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, - 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, -} - -var ngrams_8859_9_tr = [64]uint32{ - 0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961, - 0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062, - 0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062, - 0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD, -} - -func newRecognizer_8859_9(language string, ngram *[64]uint32) *recognizerSingleByte { - return &recognizerSingleByte{ - charset: "ISO-8859-9", - hasC1ByteCharset: "windows-1254", - language: language, - charMap: &charMap_8859_9, - ngram: ngram, - } -} - -func newRecognizer_8859_9_tr() *recognizerSingleByte { - return newRecognizer_8859_9("tr", &ngrams_8859_9_tr) -} - -var charMap_windows_1256 = [256]byte{ - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, - 0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F, - 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, - 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, - 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20, - 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, - 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, - 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, - 0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20, - 0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF, -} - -var ngrams_windows_1256 = [64]uint32{ - 0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8, - 0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD, - 0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20, - 0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420, -} - -func newRecognizer_windows_1256() *recognizerSingleByte { - return &recognizerSingleByte{ - charset: "windows-1256", - language: "ar", - charMap: &charMap_windows_1256, - ngram: &ngrams_windows_1256, - } -} - -var charMap_windows_1251 = [256]byte{ - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, - 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F, - 0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20, - 0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF, - 0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20, - 0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF, - 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, - 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, - 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, - 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, - 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, - 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, - 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, - 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, -} - -var ngrams_windows_1251 = [64]uint32{ - 0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE, - 0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED, - 0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2, - 0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520, -} - -func newRecognizer_windows_1251() *recognizerSingleByte { - return &recognizerSingleByte{ - charset: "windows-1251", - language: "ar", - charMap: &charMap_windows_1251, - ngram: &ngrams_windows_1251, - } -} - -var charMap_KOI8_R = [256]byte{ - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, - 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, - 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, - 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, - 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, - 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, - 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, - 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, - 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, -} - -var ngrams_KOI8_R = [64]uint32{ - 0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1, - 0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE, - 0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1, - 0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF, -} - -func newRecognizer_KOI8_R() *recognizerSingleByte { - return &recognizerSingleByte{ - charset: "KOI8-R", - language: "ru", - charMap: &charMap_KOI8_R, - ngram: &ngrams_KOI8_R, - } -} - -var charMap_IBM424_he = [256]byte{ - /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ - /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* 4- */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* 5- */ 0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* 6- */ 0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* 7- */ 0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40, - /* 8- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* 9- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* B- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, -} - -var ngrams_IBM424_he_rtl = [64]uint32{ - 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641, - 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045, - 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056, - 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069, -} - -var ngrams_IBM424_he_ltr = [64]uint32{ - 0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141, - 0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054, - 0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940, - 0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651, -} - -func newRecognizer_IBM424_he(charset string, ngram *[64]uint32) *recognizerSingleByte { - return &recognizerSingleByte{ - charset: charset, - language: "he", - charMap: &charMap_IBM424_he, - ngram: ngram, - } -} - -func newRecognizer_IBM424_he_rtl() *recognizerSingleByte { - return newRecognizer_IBM424_he("IBM424_rtl", &ngrams_IBM424_he_rtl) -} - -func newRecognizer_IBM424_he_ltr() *recognizerSingleByte { - return newRecognizer_IBM424_he("IBM424_ltr", &ngrams_IBM424_he_ltr) -} - -var charMap_IBM420_ar = [256]byte{ - /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ - /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* 1- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* 2- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* 3- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* 4- */ 0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* 5- */ 0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* 6- */ 0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* 7- */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, - /* 8- */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, - /* 9- */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, - /* A- */ 0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, - /* B- */ 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, - /* C- */ 0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF, - /* D- */ 0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, - /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF, - /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40, -} - -var ngrams_IBM420_ar_rtl = [64]uint32{ - 0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158, - 0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB, - 0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40, - 0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40, -} - -var ngrams_IBM420_ar_ltr = [64]uint32{ - 0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF, - 0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD, - 0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156, - 0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156, -} - -func newRecognizer_IBM420_ar(charset string, ngram *[64]uint32) *recognizerSingleByte { - return &recognizerSingleByte{ - charset: charset, - language: "ar", - charMap: &charMap_IBM420_ar, - ngram: ngram, - } -} - -func newRecognizer_IBM420_ar_rtl() *recognizerSingleByte { - return newRecognizer_IBM420_ar("IBM420_rtl", &ngrams_IBM420_ar_rtl) -} - -func newRecognizer_IBM420_ar_ltr() *recognizerSingleByte { - return newRecognizer_IBM420_ar("IBM420_ltr", &ngrams_IBM420_ar_ltr) -} diff --git a/vendor/github.com/saintfish/chardet/unicode.go b/vendor/github.com/saintfish/chardet/unicode.go deleted file mode 100644 index 6f9fa9e..0000000 --- a/vendor/github.com/saintfish/chardet/unicode.go +++ /dev/null @@ -1,103 +0,0 @@ -package chardet - -import ( - "bytes" -) - -var ( - utf16beBom = []byte{0xFE, 0xFF} - utf16leBom = []byte{0xFF, 0xFE} - utf32beBom = []byte{0x00, 0x00, 0xFE, 0xFF} - utf32leBom = []byte{0xFF, 0xFE, 0x00, 0x00} -) - -type recognizerUtf16be struct { -} - -func newRecognizer_utf16be() *recognizerUtf16be { - return &recognizerUtf16be{} -} - -func (*recognizerUtf16be) Match(input *recognizerInput) (output recognizerOutput) { - output = recognizerOutput{ - Charset: "UTF-16BE", - } - if bytes.HasPrefix(input.raw, utf16beBom) { - output.Confidence = 100 - } - return -} - -type recognizerUtf16le struct { -} - -func newRecognizer_utf16le() *recognizerUtf16le { - return &recognizerUtf16le{} -} - -func (*recognizerUtf16le) Match(input *recognizerInput) (output recognizerOutput) { - output = recognizerOutput{ - Charset: "UTF-16LE", - } - if bytes.HasPrefix(input.raw, utf16leBom) && !bytes.HasPrefix(input.raw, utf32leBom) { - output.Confidence = 100 - } - return -} - -type recognizerUtf32 struct { - name string - bom []byte - decodeChar func(input []byte) uint32 -} - -func decodeUtf32be(input []byte) uint32 { - return uint32(input[0])<<24 | uint32(input[1])<<16 | uint32(input[2])<<8 | uint32(input[3]) -} - -func decodeUtf32le(input []byte) uint32 { - return uint32(input[3])<<24 | uint32(input[2])<<16 | uint32(input[1])<<8 | uint32(input[0]) -} - -func newRecognizer_utf32be() *recognizerUtf32 { - return &recognizerUtf32{ - "UTF-32BE", - utf32beBom, - decodeUtf32be, - } -} - -func newRecognizer_utf32le() *recognizerUtf32 { - return &recognizerUtf32{ - "UTF-32LE", - utf32leBom, - decodeUtf32le, - } -} - -func (r *recognizerUtf32) Match(input *recognizerInput) (output recognizerOutput) { - output = recognizerOutput{ - Charset: r.name, - } - hasBom := bytes.HasPrefix(input.raw, r.bom) - var numValid, numInvalid uint32 - for b := input.raw; len(b) >= 4; b = b[4:] { - if c := r.decodeChar(b); c >= 0x10FFFF || (c >= 0xD800 && c <= 0xDFFF) { - numInvalid++ - } else { - numValid++ - } - } - if hasBom && numInvalid == 0 { - output.Confidence = 100 - } else if hasBom && numValid > numInvalid*10 { - output.Confidence = 80 - } else if numValid > 3 && numInvalid == 0 { - output.Confidence = 100 - } else if numValid > 0 && numInvalid == 0 { - output.Confidence = 80 - } else if numValid > numInvalid*10 { - output.Confidence = 25 - } - return -} diff --git a/vendor/github.com/saintfish/chardet/utf8.go b/vendor/github.com/saintfish/chardet/utf8.go deleted file mode 100644 index ae036ad..0000000 --- a/vendor/github.com/saintfish/chardet/utf8.go +++ /dev/null @@ -1,71 +0,0 @@ -package chardet - -import ( - "bytes" -) - -var utf8Bom = []byte{0xEF, 0xBB, 0xBF} - -type recognizerUtf8 struct { -} - -func newRecognizer_utf8() *recognizerUtf8 { - return &recognizerUtf8{} -} - -func (*recognizerUtf8) Match(input *recognizerInput) (output recognizerOutput) { - output = recognizerOutput{ - Charset: "UTF-8", - } - hasBom := bytes.HasPrefix(input.raw, utf8Bom) - inputLen := len(input.raw) - var numValid, numInvalid uint32 - var trailBytes uint8 - for i := 0; i < inputLen; i++ { - c := input.raw[i] - if c&0x80 == 0 { - continue - } - if c&0xE0 == 0xC0 { - trailBytes = 1 - } else if c&0xF0 == 0xE0 { - trailBytes = 2 - } else if c&0xF8 == 0xF0 { - trailBytes = 3 - } else { - numInvalid++ - if numInvalid > 5 { - break - } - trailBytes = 0 - } - - for i++; i < inputLen; i++ { - c = input.raw[i] - if c&0xC0 != 0x80 { - numInvalid++ - break - } - if trailBytes--; trailBytes == 0 { - numValid++ - break - } - } - } - - if hasBom && numInvalid == 0 { - output.Confidence = 100 - } else if hasBom && numValid > numInvalid*10 { - output.Confidence = 80 - } else if numValid > 3 && numInvalid == 0 { - output.Confidence = 100 - } else if numValid > 0 && numInvalid == 0 { - output.Confidence = 80 - } else if numValid == 0 && numInvalid == 0 { - // Plain ASCII - output.Confidence = 10 - } else if numValid > numInvalid*10 { - output.Confidence = 25 - } - return -} diff --git a/vendor/github.com/temoto/robotstxt/.gitignore b/vendor/github.com/temoto/robotstxt/.gitignore deleted file mode 100644 index 6205f9e..0000000 --- a/vendor/github.com/temoto/robotstxt/.gitignore +++ /dev/null @@ -1,15 +0,0 @@ -*.cgo?.* -*.o -*.so -*.sublime-* -*.zip -.DS_Store -.idea/ -.tags* -_cgo_* -_gofuzz/crashers/ -_gofuzz/suppressions/ -_obj -_test -coverage.txt -robots.txt-check/robots.txt-check diff --git a/vendor/github.com/temoto/robotstxt/.golangci.yml b/vendor/github.com/temoto/robotstxt/.golangci.yml deleted file mode 100644 index 24e5858..0000000 --- a/vendor/github.com/temoto/robotstxt/.golangci.yml +++ /dev/null @@ -1,20 +0,0 @@ -linters: - enable: - - goconst - - gofmt - - gosec - - maligned - - prealloc - - staticcheck - disable: - - deadcode - - structcheck - - varcheck - -linters-settings: - gofmt: - simplify: true - govet: - check-shadowing: true - maligned: - suggest-new: true diff --git a/vendor/github.com/temoto/robotstxt/.travis.yml b/vendor/github.com/temoto/robotstxt/.travis.yml deleted file mode 100644 index ad90dac..0000000 --- a/vendor/github.com/temoto/robotstxt/.travis.yml +++ /dev/null @@ -1,30 +0,0 @@ -cache: - go: true - directories: - - $HOME/.cache - - $HOME/bin - - $HOME/gopath/pkg/mod -language: go -go: -- 1.11 -- 1.12 -- 1.13 -- 1.14 -- 1.x -- master -install: true -script: GO111MODULE=on go test -race - -matrix: - include: - - go: 1.x - env: task=coverage - script: GO111MODULE=on go test -race -covermode=atomic -coverprofile=coverage.txt - after_success: bash <(curl -s https://codecov.io/bash) - - go: 1.x - env: task=bench - script: GO111MODULE=on ./script/bench - - go: 1.x - install: curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | bash -s -- -b $HOME/bin v1.19.1 - env: task=clean - script: GO111MODULE=on ./script/clean diff --git a/vendor/github.com/temoto/robotstxt/BUILD.bazel b/vendor/github.com/temoto/robotstxt/BUILD.bazel deleted file mode 100644 index adae9a7..0000000 --- a/vendor/github.com/temoto/robotstxt/BUILD.bazel +++ /dev/null @@ -1,13 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -go_library( - name = "robotstxt", - srcs = [ - "parser.go", - "robotstxt.go", - "scanner.go", - ], - importmap = "peridot.resf.org/vendor/github.com/temoto/robotstxt", - importpath = "github.com/temoto/robotstxt", - visibility = ["//visibility:public"], -) diff --git a/vendor/github.com/temoto/robotstxt/LICENSE b/vendor/github.com/temoto/robotstxt/LICENSE deleted file mode 100644 index c125145..0000000 --- a/vendor/github.com/temoto/robotstxt/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License - -Copyright (c) 2010 Sergey Shepelev - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/vendor/github.com/temoto/robotstxt/README.rst b/vendor/github.com/temoto/robotstxt/README.rst deleted file mode 100644 index 92f1ae1..0000000 --- a/vendor/github.com/temoto/robotstxt/README.rst +++ /dev/null @@ -1,115 +0,0 @@ -What -==== - -This is a robots.txt exclusion protocol implementation for Go language (golang). - - -Build -===== - -To build and run tests run `go test` in source directory. - - -Contribute -========== - -Warm welcome. - -* If desired, add your name in README.rst, section Who. -* Run `script/test && script/clean && echo ok` -* You can ignore linter warnings, but everything else must pass. -* Send your change as pull request or just a regular patch to current maintainer (see section Who). - -Thank you. - - -Usage -===== - -As usual, no special installation is required, just - - import "github.com/temoto/robotstxt" - -run `go get` and you're ready. - -1. Parse -^^^^^^^^ - -First of all, you need to parse robots.txt data. You can do it with -functions `FromBytes(body []byte) (*RobotsData, error)` or same for `string`:: - - robots, err := robotstxt.FromBytes([]byte("User-agent: *\nDisallow:")) - robots, err := robotstxt.FromString("User-agent: *\nDisallow:") - -As of 2012-10-03, `FromBytes` is the most efficient method, everything else -is a wrapper for this core function. - -There are few convenient constructors for various purposes: - -* `FromResponse(*http.Response) (*RobotsData, error)` to init robots data -from HTTP response. It *does not* call `response.Body.Close()`:: - - robots, err := robotstxt.FromResponse(resp) - resp.Body.Close() - if err != nil { - log.Println("Error parsing robots.txt:", err.Error()) - } - -* `FromStatusAndBytes(statusCode int, body []byte) (*RobotsData, error)` or -`FromStatusAndString` if you prefer to read bytes (string) yourself. -Passing status code applies following logic in line with Google's interpretation -of robots.txt files: - - * status 2xx -> parse body with `FromBytes` and apply rules listed there. - * status 4xx -> allow all (even 401/403, as recommended by Google). - * other (5xx) -> disallow all, consider this a temporary unavailability. - -2. Query -^^^^^^^^ - -Parsing robots.txt content builds a kind of logic database, which you can -query with `(r *RobotsData) TestAgent(url, agent string) (bool)`. - -Explicit passing of agent is useful if you want to query for different agents. For -single agent users there is an efficient option: `RobotsData.FindGroup(userAgent string)` -returns a structure with `.Test(path string)` method and `.CrawlDelay time.Duration`. - -Simple query with explicit user agent. Each call will scan all rules. - -:: - - allow := robots.TestAgent("/", "FooBot") - -Or query several paths against same user agent for performance. - -:: - - group := robots.FindGroup("BarBot") - group.Test("/") - group.Test("/download.mp3") - group.Test("/news/article-2012-1") - - -Who -=== - -Honorable contributors (in undefined order): - - * Ilya Grigorik (igrigorik) - * Martin Angers (PuerkitoBio) - * Micha Gorelick (mynameisfiber) - -Initial commit and other: Sergey Shepelev temotor@gmail.com - - -Flair -===== - -.. image:: https://travis-ci.org/temoto/robotstxt.svg?branch=master - :target: https://travis-ci.org/temoto/robotstxt - -.. image:: https://codecov.io/gh/temoto/robotstxt/branch/master/graph/badge.svg - :target: https://codecov.io/gh/temoto/robotstxt - -.. image:: https://goreportcard.com/badge/github.com/temoto/robotstxt - :target: https://goreportcard.com/report/github.com/temoto/robotstxt diff --git a/vendor/github.com/temoto/robotstxt/codecov.yml b/vendor/github.com/temoto/robotstxt/codecov.yml deleted file mode 100644 index b80be28..0000000 --- a/vendor/github.com/temoto/robotstxt/codecov.yml +++ /dev/null @@ -1,2 +0,0 @@ -codecov: - token: 6bf9c7eb-69ff-4b74-8464-e2fb452d0f04 diff --git a/vendor/github.com/temoto/robotstxt/fuzz.go b/vendor/github.com/temoto/robotstxt/fuzz.go deleted file mode 100644 index de4b058..0000000 --- a/vendor/github.com/temoto/robotstxt/fuzz.go +++ /dev/null @@ -1,29 +0,0 @@ -// +build gofuzz - -package robotstxt - -import "testing/quick" - -func Fuzz(data []byte) int { - r, err := FromBytes(data) - if err != nil { - if r != nil { - panic("r != nil on error") - } - return 0 - } - - // FindGroup must never return nil - f1 := func(agent string) bool { return r.FindGroup(agent) != nil } - if err := quick.Check(f1, nil); err != nil { - panic(err) - } - - // just check TestAgent doesn't panic - f2 := func(path, agent string) bool { r.TestAgent(path, agent); return true } - if err := quick.Check(f2, nil); err != nil { - panic(err) - } - - return 1 -} diff --git a/vendor/github.com/temoto/robotstxt/go.mod b/vendor/github.com/temoto/robotstxt/go.mod deleted file mode 100644 index 1a5ea1b..0000000 --- a/vendor/github.com/temoto/robotstxt/go.mod +++ /dev/null @@ -1,5 +0,0 @@ -module github.com/temoto/robotstxt - -go 1.11 - -require github.com/stretchr/testify v1.3.0 diff --git a/vendor/github.com/temoto/robotstxt/go.sum b/vendor/github.com/temoto/robotstxt/go.sum deleted file mode 100644 index 4347755..0000000 --- a/vendor/github.com/temoto/robotstxt/go.sum +++ /dev/null @@ -1,7 +0,0 @@ -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= diff --git a/vendor/github.com/temoto/robotstxt/parser.go b/vendor/github.com/temoto/robotstxt/parser.go deleted file mode 100644 index 46eb6b1..0000000 --- a/vendor/github.com/temoto/robotstxt/parser.go +++ /dev/null @@ -1,271 +0,0 @@ -package robotstxt - -// Comments explaining the logic are taken from either the google's spec: -// https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt -// -// or the Wikipedia's entry on robots.txt: -// http://en.wikipedia.org/wiki/Robots.txt - -import ( - "fmt" - "io" - "math" - "regexp" - "strconv" - "strings" - "time" -) - -type lineType uint - -const ( - lIgnore lineType = iota - lUnknown - lUserAgent - lAllow - lDisallow - lCrawlDelay - lSitemap - lHost -) - -type parser struct { - tokens []string - pos int -} - -type lineInfo struct { - t lineType // Type of line key - k string // String representation of the type of key - vs string // String value of the key - vf float64 // Float value of the key - vr *regexp.Regexp // Regexp value of the key -} - -func newParser(tokens []string) *parser { - return &parser{tokens: tokens} -} - -func parseGroupMap(groups map[string]*Group, agents []string, fun func(*Group)) { - var g *Group - for _, a := range agents { - if g = groups[a]; g == nil { - g = new(Group) - groups[a] = g - } - fun(g) - } -} - -func (p *parser) parseAll() (groups map[string]*Group, host string, sitemaps []string, errs []error) { - groups = make(map[string]*Group, 16) - agents := make([]string, 0, 4) - isEmptyGroup := true - - // Reset internal fields, tokens are assigned at creation time, never change - p.pos = 0 - - for { - if li, err := p.parseLine(); err != nil { - if err == io.EOF { - break - } - errs = append(errs, err) - } else { - switch li.t { - case lUserAgent: - // Two successive user-agent lines are part of the same group. - if !isEmptyGroup { - // End previous group - agents = make([]string, 0, 4) - } - if len(agents) == 0 { - isEmptyGroup = true - } - agents = append(agents, li.vs) - - case lDisallow: - // Error if no current group - if len(agents) == 0 { - errs = append(errs, fmt.Errorf("Disallow before User-agent at token #%d.", p.pos)) - } else { - isEmptyGroup = false - var r *rule - if li.vr != nil { - r = &rule{"", false, li.vr} - } else { - r = &rule{li.vs, false, nil} - } - parseGroupMap(groups, agents, func(g *Group) { g.rules = append(g.rules, r) }) - } - - case lAllow: - // Error if no current group - if len(agents) == 0 { - errs = append(errs, fmt.Errorf("Allow before User-agent at token #%d.", p.pos)) - } else { - isEmptyGroup = false - var r *rule - if li.vr != nil { - r = &rule{"", true, li.vr} - } else { - r = &rule{li.vs, true, nil} - } - parseGroupMap(groups, agents, func(g *Group) { g.rules = append(g.rules, r) }) - } - - case lHost: - host = li.vs - - case lSitemap: - sitemaps = append(sitemaps, li.vs) - - case lCrawlDelay: - if len(agents) == 0 { - errs = append(errs, fmt.Errorf("Crawl-delay before User-agent at token #%d.", p.pos)) - } else { - isEmptyGroup = false - delay := time.Duration(li.vf * float64(time.Second)) - parseGroupMap(groups, agents, func(g *Group) { g.CrawlDelay = delay }) - } - } - } - } - return -} - -func (p *parser) parseLine() (li *lineInfo, err error) { - t1, ok1 := p.popToken() - if !ok1 { - // proper EOF - return nil, io.EOF - } - - t2, ok2 := p.peekToken() - if !ok2 { - // EOF, no value associated with the token, so ignore token and return - return nil, io.EOF - } - - // Helper closure for all string-based tokens, common behaviour: - // - Consume t2 token - // - If empty, return unknown line info - // - Otherwise return the specified line info - returnStringVal := func(t lineType) (*lineInfo, error) { - p.popToken() - if t2 != "" { - return &lineInfo{t: t, k: t1, vs: t2}, nil - } - return &lineInfo{t: lIgnore}, nil - } - - // Helper closure for all path tokens (allow/disallow), common behaviour: - // - Consume t2 token - // - If empty, return unknown line info - // - Otherwise, normalize the path (add leading "/" if missing, remove trailing "*") - // - Detect if wildcards are present, if so, compile into a regexp - // - Return the specified line info - returnPathVal := func(t lineType) (*lineInfo, error) { - p.popToken() - if t2 != "" { - if !strings.HasPrefix(t2, "*") && !strings.HasPrefix(t2, "/") { - t2 = "/" + t2 - } - t2 = strings.TrimRightFunc(t2, isAsterisk) - // From google's spec: - // Google, Bing, Yahoo, and Ask support a limited form of - // "wildcards" for path values. These are: - // * designates 0 or more instances of any valid character - // $ designates the end of the URL - if strings.ContainsAny(t2, "*$") { - // Must compile a regexp, this is a pattern. - // Escape string before compile. - t2 = regexp.QuoteMeta(t2) - t2 = strings.Replace(t2, `\*`, `.*`, -1) - t2 = strings.Replace(t2, `\$`, `$`, -1) - if r, e := regexp.Compile(t2); e != nil { - return nil, e - } else { - return &lineInfo{t: t, k: t1, vr: r}, nil - } - } else { - // Simple string path - return &lineInfo{t: t, k: t1, vs: t2}, nil - } - } - return &lineInfo{t: lIgnore}, nil - } - - switch strings.ToLower(t1) { - case tokEOL: - // Don't consume t2 and continue parsing - return &lineInfo{t: lIgnore}, nil - - case "user-agent", "useragent": - // From google's spec: - // Handling of elements with simple errors / typos (eg "useragent" - // instead of "user-agent") is undefined and may be interpreted as correct - // directives by some user-agents. - // The user-agent is non-case-sensitive. - t2 = strings.ToLower(t2) - return returnStringVal(lUserAgent) - - case "disallow": - // From google's spec: - // When no path is specified, the directive is ignored (so an empty Disallow - // CAN be an allow, since allow is the default. The actual result depends - // on the other rules in the group). - return returnPathVal(lDisallow) - - case "allow": - // From google's spec: - // When no path is specified, the directive is ignored. - return returnPathVal(lAllow) - - case "host": - // Host directive to specify main site mirror - // Read more: https://help.yandex.com/webmaster/controlling-robot/robots-txt.xml#host - return returnStringVal(lHost) - - case "sitemap": - // Non-group field, applies to the host as a whole, not to a specific user-agent - return returnStringVal(lSitemap) - - case "crawl-delay", "crawldelay": - // From http://en.wikipedia.org/wiki/Robots_exclusion_standard#Nonstandard_extensions - // Several major crawlers support a Crawl-delay parameter, set to the - // number of seconds to wait between successive requests to the same server. - p.popToken() - if cd, e := strconv.ParseFloat(t2, 64); e != nil { - return nil, e - } else if cd < 0 || math.IsInf(cd, 0) || math.IsNaN(cd) { - return nil, fmt.Errorf("Crawl-delay invalid value '%s'", t2) - } else { - return &lineInfo{t: lCrawlDelay, k: t1, vf: cd}, nil - } - } - - // Consume t2 token - p.popToken() - return &lineInfo{t: lUnknown, k: t1}, nil -} - -func (p *parser) popToken() (tok string, ok bool) { - tok, ok = p.peekToken() - if !ok { - return - } - p.pos++ - return tok, true -} - -func (p *parser) peekToken() (tok string, ok bool) { - if p.pos >= len(p.tokens) { - return "", false - } - return p.tokens[p.pos], true -} - -func isAsterisk(r rune) bool { - return r == '*' -} diff --git a/vendor/github.com/temoto/robotstxt/robotstxt.go b/vendor/github.com/temoto/robotstxt/robotstxt.go deleted file mode 100644 index 52d3637..0000000 --- a/vendor/github.com/temoto/robotstxt/robotstxt.go +++ /dev/null @@ -1,227 +0,0 @@ -// Package robotstxt implements the robots.txt Exclusion Protocol -// as specified in http://www.robotstxt.org/wc/robots.html -// with various extensions. -package robotstxt - -// Comments explaining the logic are taken from either the Google's spec: -// https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt - -import ( - "bytes" - "errors" - "io/ioutil" - "net/http" - "regexp" - "strconv" - "strings" - "time" -) - -type RobotsData struct { - // private - groups map[string]*Group - allowAll bool - disallowAll bool - Host string - Sitemaps []string -} - -type Group struct { - rules []*rule - Agent string - CrawlDelay time.Duration -} - -type rule struct { - path string - allow bool - pattern *regexp.Regexp -} - -type ParseError struct { - Errs []error -} - -func newParseError(errs []error) *ParseError { - return &ParseError{errs} -} - -func (e ParseError) Error() string { - var b bytes.Buffer - - b.WriteString("Parse error(s): " + "\n") - for _, er := range e.Errs { - b.WriteString(er.Error() + "\n") - } - return b.String() -} - -var allowAll = &RobotsData{allowAll: true} -var disallowAll = &RobotsData{disallowAll: true} -var emptyGroup = &Group{} - -func FromStatusAndBytes(statusCode int, body []byte) (*RobotsData, error) { - switch { - case statusCode >= 200 && statusCode < 300: - return FromBytes(body) - - // From https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt - // - // Google treats all 4xx errors in the same way and assumes that no valid - // robots.txt file exists. It is assumed that there are no restrictions. - // This is a "full allow" for crawling. Note: this includes 401 - // "Unauthorized" and 403 "Forbidden" HTTP result codes. - case statusCode >= 400 && statusCode < 500: - return allowAll, nil - - // From Google's spec: - // Server errors (5xx) are seen as temporary errors that result in a "full - // disallow" of crawling. - case statusCode >= 500 && statusCode < 600: - return disallowAll, nil - } - - return nil, errors.New("Unexpected status: " + strconv.Itoa(statusCode)) -} - -func FromStatusAndString(statusCode int, body string) (*RobotsData, error) { - return FromStatusAndBytes(statusCode, []byte(body)) -} - -func FromResponse(res *http.Response) (*RobotsData, error) { - if res == nil { - // Edge case, if res is nil, return nil data - return nil, nil - } - buf, e := ioutil.ReadAll(res.Body) - if e != nil { - return nil, e - } - return FromStatusAndBytes(res.StatusCode, buf) -} - -func FromBytes(body []byte) (r *RobotsData, err error) { - var errs []error - - // special case (probably not worth optimization?) - trimmed := bytes.TrimSpace(body) - if len(trimmed) == 0 { - return allowAll, nil - } - - sc := newByteScanner("bytes", true) - //sc.Quiet = !print_errors - sc.feed(body, true) - tokens := sc.scanAll() - - // special case worth optimization - if len(tokens) == 0 { - return allowAll, nil - } - - r = &RobotsData{} - parser := newParser(tokens) - r.groups, r.Host, r.Sitemaps, errs = parser.parseAll() - if len(errs) > 0 { - return nil, newParseError(errs) - } - - return r, nil -} - -func FromString(body string) (r *RobotsData, err error) { - return FromBytes([]byte(body)) -} - -func (r *RobotsData) TestAgent(path, agent string) bool { - if r.allowAll { - return true - } - if r.disallowAll { - return false - } - - // Find a group of rules that applies to this agent - // From Google's spec: - // The user-agent is non-case-sensitive. - g := r.FindGroup(agent) - return g.Test(path) -} - -// FindGroup searches block of declarations for specified user-agent. -// From Google's spec: -// Only one group of group-member records is valid for a particular crawler. -// The crawler must determine the correct group of records by finding the group -// with the most specific user-agent that still matches. All other groups of -// records are ignored by the crawler. The user-agent is non-case-sensitive. -// The order of the groups within the robots.txt file is irrelevant. -func (r *RobotsData) FindGroup(agent string) (ret *Group) { - var prefixLen int - - agent = strings.ToLower(agent) - if ret = r.groups["*"]; ret != nil { - // Weakest match possible - prefixLen = 1 - } - for a, g := range r.groups { - if a != "*" && strings.HasPrefix(agent, a) { - if l := len(a); l > prefixLen { - prefixLen = l - ret = g - } - } - } - - if ret == nil { - return emptyGroup - } - return -} - -func (g *Group) Test(path string) bool { - if r := g.findRule(path); r != nil { - return r.allow - } - - // From Google's spec: - // By default, there are no restrictions for crawling for the designated crawlers. - return true -} - -// From Google's spec: -// The path value is used as a basis to determine whether or not a rule applies -// to a specific URL on a site. With the exception of wildcards, the path is -// used to match the beginning of a URL (and any valid URLs that start with the -// same path). -// -// At a group-member level, in particular for allow and disallow directives, -// the most specific rule based on the length of the [path] entry will trump -// the less specific (shorter) rule. The order of precedence for rules with -// wildcards is undefined. -func (g *Group) findRule(path string) (ret *rule) { - var prefixLen int - - for _, r := range g.rules { - if r.pattern != nil { - if r.pattern.MatchString(path) { - // Consider this a match equal to the length of the pattern. - // From Google's spec: - // The order of precedence for rules with wildcards is undefined. - if l := len(r.pattern.String()); l > prefixLen { - prefixLen = l - ret = r - } - } - } else if r.path == "/" && prefixLen == 0 { - // Weakest match possible - prefixLen = 1 - ret = r - } else if strings.HasPrefix(path, r.path) { - if l := len(r.path); l > prefixLen { - prefixLen = l - ret = r - } - } - } - return -} diff --git a/vendor/github.com/temoto/robotstxt/scanner.go b/vendor/github.com/temoto/robotstxt/scanner.go deleted file mode 100644 index 6bd98c2..0000000 --- a/vendor/github.com/temoto/robotstxt/scanner.go +++ /dev/null @@ -1,185 +0,0 @@ -package robotstxt - -import ( - "bytes" - "fmt" - "go/token" - "os" - "sync" - "unicode/utf8" -) - -type byteScanner struct { - pos token.Position - buf []byte - ErrorCount int - ch rune - Quiet bool - keyTokenFound bool - lastChunk bool -} - -const tokEOL = "\n" - -var WhitespaceChars = []rune{' ', '\t', '\v'} -var tokBuffers = sync.Pool{New: func() interface{} { return bytes.NewBuffer(make([]byte, 32)) }} - -func newByteScanner(srcname string, quiet bool) *byteScanner { - return &byteScanner{ - Quiet: quiet, - ch: -1, - pos: token.Position{Filename: srcname}, - } -} - -func (s *byteScanner) feed(input []byte, end bool) { - s.buf = input - s.pos.Offset = 0 - s.pos.Line = 1 - s.pos.Column = 1 - s.lastChunk = end - - // Read first char into look-ahead buffer `s.ch`. - if !s.nextChar() { - return - } - - // Skip UTF-8 byte order mark - if s.ch == 65279 { - s.nextChar() - s.pos.Column = 1 - } -} - -func (s *byteScanner) GetPosition() token.Position { - return s.pos -} - -func (s *byteScanner) scan() string { - // Note Offset > len, not >=, so we can scan last character. - if s.lastChunk && s.pos.Offset > len(s.buf) { - return "" - } - - s.skipSpace() - - if s.ch == -1 { - return "" - } - - // EOL - if s.isEol() { - s.keyTokenFound = false - // skip subsequent newline chars - for s.ch != -1 && s.isEol() { - s.nextChar() - } - // emit newline as separate token - return tokEOL - } - - // skip comments - if s.ch == '#' { - s.keyTokenFound = false - s.skipUntilEol() - if s.ch == -1 { - return "" - } - // emit newline as separate token - return tokEOL - } - - // else we found something - tok := tokBuffers.Get().(*bytes.Buffer) - defer tokBuffers.Put(tok) - tok.Reset() - tok.WriteRune(s.ch) - s.nextChar() - for s.ch != -1 && !s.isSpace() && !s.isEol() { - // Do not consider ":" to be a token separator if a first key token - // has already been found on this line (avoid cutting an absolute URL - // after the "http:") - if s.ch == ':' && !s.keyTokenFound { - s.nextChar() - s.keyTokenFound = true - break - } - - tok.WriteRune(s.ch) - s.nextChar() - } - return tok.String() -} - -func (s *byteScanner) scanAll() []string { - results := make([]string, 0, 64) // random guess of average tokens length - for { - token := s.scan() - if token != "" { - results = append(results, token) - } else { - break - } - } - return results -} - -func (s *byteScanner) error(pos token.Position, msg string) { - s.ErrorCount++ - if !s.Quiet { - fmt.Fprintf(os.Stderr, "robotstxt from %s: %s\n", pos.String(), msg) - } -} - -func (s *byteScanner) isEol() bool { - return s.ch == '\n' || s.ch == '\r' -} - -func (s *byteScanner) isSpace() bool { - for _, r := range WhitespaceChars { - if s.ch == r { - return true - } - } - return false -} - -func (s *byteScanner) skipSpace() { - for s.ch != -1 && s.isSpace() { - s.nextChar() - } -} - -func (s *byteScanner) skipUntilEol() { - for s.ch != -1 && !s.isEol() { - s.nextChar() - } - // skip subsequent newline chars - for s.ch != -1 && s.isEol() { - s.nextChar() - } -} - -// Reads next Unicode char. -func (s *byteScanner) nextChar() bool { - if s.pos.Offset >= len(s.buf) { - s.ch = -1 - return false - } - s.pos.Column++ - if s.ch == '\n' { - s.pos.Line++ - s.pos.Column = 1 - } - r, w := rune(s.buf[s.pos.Offset]), 1 - if r >= 0x80 { - r, w = utf8.DecodeRune(s.buf[s.pos.Offset:]) - if r == utf8.RuneError && w == 1 { - s.error(s.pos, "illegal UTF-8 encoding") - } - } - s.pos.Column++ - s.pos.Offset += w - s.ch = r - return true -} diff --git a/vendor/go.temporal.io/sdk/testsuite/BUILD.bazel b/vendor/go.temporal.io/sdk/testsuite/BUILD.bazel deleted file mode 100644 index 80d144d..0000000 --- a/vendor/go.temporal.io/sdk/testsuite/BUILD.bazel +++ /dev/null @@ -1,10 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -go_library( - name = "testsuite", - srcs = ["testsuite.go"], - importmap = "peridot.resf.org/vendor/go.temporal.io/sdk/testsuite", - importpath = "go.temporal.io/sdk/testsuite", - visibility = ["//visibility:public"], - deps = ["//vendor/go.temporal.io/sdk/internal"], -) diff --git a/vendor/go.temporal.io/sdk/testsuite/testsuite.go b/vendor/go.temporal.io/sdk/testsuite/testsuite.go deleted file mode 100644 index b130233..0000000 --- a/vendor/go.temporal.io/sdk/testsuite/testsuite.go +++ /dev/null @@ -1,47 +0,0 @@ -// The MIT License -// -// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. -// -// Copyright (c) 2020 Uber Technologies, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -// Package testsuite contains unit testing framework for Temporal workflows and activities. -package testsuite - -import ( - "go.temporal.io/sdk/internal" -) - -type ( - // WorkflowTestSuite is the test suite to run unit tests for workflow/activity. - WorkflowTestSuite = internal.WorkflowTestSuite - - // TestWorkflowEnvironment is the environment that you use to test workflow - TestWorkflowEnvironment = internal.TestWorkflowEnvironment - - // TestActivityEnvironment is the environment that you use to test activity - TestActivityEnvironment = internal.TestActivityEnvironment - - // MockCallWrapper is a wrapper to mock.Call. It offers the ability to wait on workflow's clock instead of wall clock. - MockCallWrapper = internal.MockCallWrapper -) - -// ErrMockStartChildWorkflowFailed is special error used to indicate the mocked child workflow should fail to start. -var ErrMockStartChildWorkflowFailed = internal.ErrMockStartChildWorkflowFailed diff --git a/vendor/golang.org/x/net/html/atom/atom.go b/vendor/golang.org/x/net/html/atom/atom.go deleted file mode 100644 index cd0a8ac..0000000 --- a/vendor/golang.org/x/net/html/atom/atom.go +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package atom provides integer codes (also known as atoms) for a fixed set of -// frequently occurring HTML strings: tag names and attribute keys such as "p" -// and "id". -// -// Sharing an atom's name between all elements with the same tag can result in -// fewer string allocations when tokenizing and parsing HTML. Integer -// comparisons are also generally faster than string comparisons. -// -// The value of an atom's particular code is not guaranteed to stay the same -// between versions of this package. Neither is any ordering guaranteed: -// whether atom.H1 < atom.H2 may also change. The codes are not guaranteed to -// be dense. The only guarantees are that e.g. looking up "div" will yield -// atom.Div, calling atom.Div.String will return "div", and atom.Div != 0. -package atom // import "golang.org/x/net/html/atom" - -// Atom is an integer code for a string. The zero value maps to "". -type Atom uint32 - -// String returns the atom's name. -func (a Atom) String() string { - start := uint32(a >> 8) - n := uint32(a & 0xff) - if start+n > uint32(len(atomText)) { - return "" - } - return atomText[start : start+n] -} - -func (a Atom) string() string { - return atomText[a>>8 : a>>8+a&0xff] -} - -// fnv computes the FNV hash with an arbitrary starting value h. -func fnv(h uint32, s []byte) uint32 { - for i := range s { - h ^= uint32(s[i]) - h *= 16777619 - } - return h -} - -func match(s string, t []byte) bool { - for i, c := range t { - if s[i] != c { - return false - } - } - return true -} - -// Lookup returns the atom whose name is s. It returns zero if there is no -// such atom. The lookup is case sensitive. -func Lookup(s []byte) Atom { - if len(s) == 0 || len(s) > maxAtomLen { - return 0 - } - h := fnv(hash0, s) - if a := table[h&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) { - return a - } - if a := table[(h>>16)&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) { - return a - } - return 0 -} - -// String returns a string whose contents are equal to s. In that sense, it is -// equivalent to string(s) but may be more efficient. -func String(s []byte) string { - if a := Lookup(s); a != 0 { - return a.String() - } - return string(s) -} diff --git a/vendor/golang.org/x/net/html/atom/table.go b/vendor/golang.org/x/net/html/atom/table.go deleted file mode 100644 index 2a93886..0000000 --- a/vendor/golang.org/x/net/html/atom/table.go +++ /dev/null @@ -1,783 +0,0 @@ -// Code generated by go generate gen.go; DO NOT EDIT. - -//go:generate go run gen.go - -package atom - -const ( - A Atom = 0x1 - Abbr Atom = 0x4 - Accept Atom = 0x1a06 - AcceptCharset Atom = 0x1a0e - Accesskey Atom = 0x2c09 - Acronym Atom = 0xaa07 - Action Atom = 0x27206 - Address Atom = 0x6f307 - Align Atom = 0xb105 - Allowfullscreen Atom = 0x2080f - Allowpaymentrequest Atom = 0xc113 - Allowusermedia Atom = 0xdd0e - Alt Atom = 0xf303 - Annotation Atom = 0x1c90a - AnnotationXml Atom = 0x1c90e - Applet Atom = 0x31906 - Area Atom = 0x35604 - Article Atom = 0x3fc07 - As Atom = 0x3c02 - Aside Atom = 0x10705 - Async Atom = 0xff05 - Audio Atom = 0x11505 - Autocomplete Atom = 0x2780c - Autofocus Atom = 0x12109 - Autoplay Atom = 0x13c08 - B Atom = 0x101 - Base Atom = 0x3b04 - Basefont Atom = 0x3b08 - Bdi Atom = 0xba03 - Bdo Atom = 0x14b03 - Bgsound Atom = 0x15e07 - Big Atom = 0x17003 - Blink Atom = 0x17305 - Blockquote Atom = 0x1870a - Body Atom = 0x2804 - Br Atom = 0x202 - Button Atom = 0x19106 - Canvas Atom = 0x10306 - Caption Atom = 0x23107 - Center Atom = 0x22006 - Challenge Atom = 0x29b09 - Charset Atom = 0x2107 - Checked Atom = 0x47907 - Cite Atom = 0x19c04 - Class Atom = 0x56405 - Code Atom = 0x5c504 - Col Atom = 0x1ab03 - Colgroup Atom = 0x1ab08 - Color Atom = 0x1bf05 - Cols Atom = 0x1c404 - Colspan Atom = 0x1c407 - Command Atom = 0x1d707 - Content Atom = 0x58b07 - Contenteditable Atom = 0x58b0f - Contextmenu Atom = 0x3800b - Controls Atom = 0x1de08 - Coords Atom = 0x1ea06 - Crossorigin Atom = 0x1fb0b - Data Atom = 0x4a504 - Datalist Atom = 0x4a508 - Datetime Atom = 0x2b808 - Dd Atom = 0x2d702 - Default Atom = 0x10a07 - Defer Atom = 0x5c705 - Del Atom = 0x45203 - Desc Atom = 0x56104 - Details Atom = 0x7207 - Dfn Atom = 0x8703 - Dialog Atom = 0xbb06 - Dir Atom = 0x9303 - Dirname Atom = 0x9307 - Disabled Atom = 0x16408 - Div Atom = 0x16b03 - Dl Atom = 0x5e602 - Download Atom = 0x46308 - Draggable Atom = 0x17a09 - Dropzone Atom = 0x40508 - Dt Atom = 0x64b02 - Em Atom = 0x6e02 - Embed Atom = 0x6e05 - Enctype Atom = 0x28d07 - Face Atom = 0x21e04 - Fieldset Atom = 0x22608 - Figcaption Atom = 0x22e0a - Figure Atom = 0x24806 - Font Atom = 0x3f04 - Footer Atom = 0xf606 - For Atom = 0x25403 - ForeignObject Atom = 0x2540d - Foreignobject Atom = 0x2610d - Form Atom = 0x26e04 - Formaction Atom = 0x26e0a - Formenctype Atom = 0x2890b - Formmethod Atom = 0x2a40a - Formnovalidate Atom = 0x2ae0e - Formtarget Atom = 0x2c00a - Frame Atom = 0x8b05 - Frameset Atom = 0x8b08 - H1 Atom = 0x15c02 - H2 Atom = 0x2de02 - H3 Atom = 0x30d02 - H4 Atom = 0x34502 - H5 Atom = 0x34f02 - H6 Atom = 0x64d02 - Head Atom = 0x33104 - Header Atom = 0x33106 - Headers Atom = 0x33107 - Height Atom = 0x5206 - Hgroup Atom = 0x2ca06 - Hidden Atom = 0x2d506 - High Atom = 0x2db04 - Hr Atom = 0x15702 - Href Atom = 0x2e004 - Hreflang Atom = 0x2e008 - Html Atom = 0x5604 - HttpEquiv Atom = 0x2e80a - I Atom = 0x601 - Icon Atom = 0x58a04 - Id Atom = 0x10902 - Iframe Atom = 0x2fc06 - Image Atom = 0x30205 - Img Atom = 0x30703 - Input Atom = 0x44b05 - Inputmode Atom = 0x44b09 - Ins Atom = 0x20403 - Integrity Atom = 0x23f09 - Is Atom = 0x16502 - Isindex Atom = 0x30f07 - Ismap Atom = 0x31605 - Itemid Atom = 0x38b06 - Itemprop Atom = 0x19d08 - Itemref Atom = 0x3cd07 - Itemscope Atom = 0x67109 - Itemtype Atom = 0x31f08 - Kbd Atom = 0xb903 - Keygen Atom = 0x3206 - Keytype Atom = 0xd607 - Kind Atom = 0x17704 - Label Atom = 0x5905 - Lang Atom = 0x2e404 - Legend Atom = 0x18106 - Li Atom = 0xb202 - Link Atom = 0x17404 - List Atom = 0x4a904 - Listing Atom = 0x4a907 - Loop Atom = 0x5d04 - Low Atom = 0xc303 - Main Atom = 0x1004 - Malignmark Atom = 0xb00a - Manifest Atom = 0x6d708 - Map Atom = 0x31803 - Mark Atom = 0xb604 - Marquee Atom = 0x32707 - Math Atom = 0x32e04 - Max Atom = 0x33d03 - Maxlength Atom = 0x33d09 - Media Atom = 0xe605 - Mediagroup Atom = 0xe60a - Menu Atom = 0x38704 - Menuitem Atom = 0x38708 - Meta Atom = 0x4b804 - Meter Atom = 0x9805 - Method Atom = 0x2a806 - Mglyph Atom = 0x30806 - Mi Atom = 0x34702 - Min Atom = 0x34703 - Minlength Atom = 0x34709 - Mn Atom = 0x2b102 - Mo Atom = 0xa402 - Ms Atom = 0x67402 - Mtext Atom = 0x35105 - Multiple Atom = 0x35f08 - Muted Atom = 0x36705 - Name Atom = 0x9604 - Nav Atom = 0x1303 - Nobr Atom = 0x3704 - Noembed Atom = 0x6c07 - Noframes Atom = 0x8908 - Nomodule Atom = 0xa208 - Nonce Atom = 0x1a605 - Noscript Atom = 0x21608 - Novalidate Atom = 0x2b20a - Object Atom = 0x26806 - Ol Atom = 0x13702 - Onabort Atom = 0x19507 - Onafterprint Atom = 0x2360c - Onautocomplete Atom = 0x2760e - Onautocompleteerror Atom = 0x27613 - Onauxclick Atom = 0x61f0a - Onbeforeprint Atom = 0x69e0d - Onbeforeunload Atom = 0x6e70e - Onblur Atom = 0x56d06 - Oncancel Atom = 0x11908 - Oncanplay Atom = 0x14d09 - Oncanplaythrough Atom = 0x14d10 - Onchange Atom = 0x41b08 - Onclick Atom = 0x2f507 - Onclose Atom = 0x36c07 - Oncontextmenu Atom = 0x37e0d - Oncopy Atom = 0x39106 - Oncuechange Atom = 0x3970b - Oncut Atom = 0x3a205 - Ondblclick Atom = 0x3a70a - Ondrag Atom = 0x3b106 - Ondragend Atom = 0x3b109 - Ondragenter Atom = 0x3ba0b - Ondragexit Atom = 0x3c50a - Ondragleave Atom = 0x3df0b - Ondragover Atom = 0x3ea0a - Ondragstart Atom = 0x3f40b - Ondrop Atom = 0x40306 - Ondurationchange Atom = 0x41310 - Onemptied Atom = 0x40a09 - Onended Atom = 0x42307 - Onerror Atom = 0x42a07 - Onfocus Atom = 0x43107 - Onhashchange Atom = 0x43d0c - Oninput Atom = 0x44907 - Oninvalid Atom = 0x45509 - Onkeydown Atom = 0x45e09 - Onkeypress Atom = 0x46b0a - Onkeyup Atom = 0x48007 - Onlanguagechange Atom = 0x48d10 - Onload Atom = 0x49d06 - Onloadeddata Atom = 0x49d0c - Onloadedmetadata Atom = 0x4b010 - Onloadend Atom = 0x4c609 - Onloadstart Atom = 0x4cf0b - Onmessage Atom = 0x4da09 - Onmessageerror Atom = 0x4da0e - Onmousedown Atom = 0x4e80b - Onmouseenter Atom = 0x4f30c - Onmouseleave Atom = 0x4ff0c - Onmousemove Atom = 0x50b0b - Onmouseout Atom = 0x5160a - Onmouseover Atom = 0x5230b - Onmouseup Atom = 0x52e09 - Onmousewheel Atom = 0x53c0c - Onoffline Atom = 0x54809 - Ononline Atom = 0x55108 - Onpagehide Atom = 0x5590a - Onpageshow Atom = 0x5730a - Onpaste Atom = 0x57f07 - Onpause Atom = 0x59a07 - Onplay Atom = 0x5a406 - Onplaying Atom = 0x5a409 - Onpopstate Atom = 0x5ad0a - Onprogress Atom = 0x5b70a - Onratechange Atom = 0x5cc0c - Onrejectionhandled Atom = 0x5d812 - Onreset Atom = 0x5ea07 - Onresize Atom = 0x5f108 - Onscroll Atom = 0x60008 - Onsecuritypolicyviolation Atom = 0x60819 - Onseeked Atom = 0x62908 - Onseeking Atom = 0x63109 - Onselect Atom = 0x63a08 - Onshow Atom = 0x64406 - Onsort Atom = 0x64f06 - Onstalled Atom = 0x65909 - Onstorage Atom = 0x66209 - Onsubmit Atom = 0x66b08 - Onsuspend Atom = 0x67b09 - Ontimeupdate Atom = 0x400c - Ontoggle Atom = 0x68408 - Onunhandledrejection Atom = 0x68c14 - Onunload Atom = 0x6ab08 - Onvolumechange Atom = 0x6b30e - Onwaiting Atom = 0x6c109 - Onwheel Atom = 0x6ca07 - Open Atom = 0x1a304 - Optgroup Atom = 0x5f08 - Optimum Atom = 0x6d107 - Option Atom = 0x6e306 - Output Atom = 0x51d06 - P Atom = 0xc01 - Param Atom = 0xc05 - Pattern Atom = 0x6607 - Picture Atom = 0x7b07 - Ping Atom = 0xef04 - Placeholder Atom = 0x1310b - Plaintext Atom = 0x1b209 - Playsinline Atom = 0x1400b - Poster Atom = 0x2cf06 - Pre Atom = 0x47003 - Preload Atom = 0x48607 - Progress Atom = 0x5b908 - Prompt Atom = 0x53606 - Public Atom = 0x58606 - Q Atom = 0xcf01 - Radiogroup Atom = 0x30a - Rb Atom = 0x3a02 - Readonly Atom = 0x35708 - Referrerpolicy Atom = 0x3d10e - Rel Atom = 0x48703 - Required Atom = 0x24c08 - Reversed Atom = 0x8008 - Rows Atom = 0x9c04 - Rowspan Atom = 0x9c07 - Rp Atom = 0x23c02 - Rt Atom = 0x19a02 - Rtc Atom = 0x19a03 - Ruby Atom = 0xfb04 - S Atom = 0x2501 - Samp Atom = 0x7804 - Sandbox Atom = 0x12907 - Scope Atom = 0x67505 - Scoped Atom = 0x67506 - Script Atom = 0x21806 - Seamless Atom = 0x37108 - Section Atom = 0x56807 - Select Atom = 0x63c06 - Selected Atom = 0x63c08 - Shape Atom = 0x1e505 - Size Atom = 0x5f504 - Sizes Atom = 0x5f505 - Slot Atom = 0x1ef04 - Small Atom = 0x20605 - Sortable Atom = 0x65108 - Sorted Atom = 0x33706 - Source Atom = 0x37806 - Spacer Atom = 0x43706 - Span Atom = 0x9f04 - Spellcheck Atom = 0x4740a - Src Atom = 0x5c003 - Srcdoc Atom = 0x5c006 - Srclang Atom = 0x5f907 - Srcset Atom = 0x6f906 - Start Atom = 0x3fa05 - Step Atom = 0x58304 - Strike Atom = 0xd206 - Strong Atom = 0x6dd06 - Style Atom = 0x6ff05 - Sub Atom = 0x66d03 - Summary Atom = 0x70407 - Sup Atom = 0x70b03 - Svg Atom = 0x70e03 - System Atom = 0x71106 - Tabindex Atom = 0x4be08 - Table Atom = 0x59505 - Target Atom = 0x2c406 - Tbody Atom = 0x2705 - Td Atom = 0x9202 - Template Atom = 0x71408 - Textarea Atom = 0x35208 - Tfoot Atom = 0xf505 - Th Atom = 0x15602 - Thead Atom = 0x33005 - Time Atom = 0x4204 - Title Atom = 0x11005 - Tr Atom = 0xcc02 - Track Atom = 0x1ba05 - Translate Atom = 0x1f209 - Tt Atom = 0x6802 - Type Atom = 0xd904 - Typemustmatch Atom = 0x2900d - U Atom = 0xb01 - Ul Atom = 0xa702 - Updateviacache Atom = 0x460e - Usemap Atom = 0x59e06 - Value Atom = 0x1505 - Var Atom = 0x16d03 - Video Atom = 0x2f105 - Wbr Atom = 0x57c03 - Width Atom = 0x64905 - Workertype Atom = 0x71c0a - Wrap Atom = 0x72604 - Xmp Atom = 0x12f03 -) - -const hash0 = 0x81cdf10e - -const maxAtomLen = 25 - -var table = [1 << 9]Atom{ - 0x1: 0xe60a, // mediagroup - 0x2: 0x2e404, // lang - 0x4: 0x2c09, // accesskey - 0x5: 0x8b08, // frameset - 0x7: 0x63a08, // onselect - 0x8: 0x71106, // system - 0xa: 0x64905, // width - 0xc: 0x2890b, // formenctype - 0xd: 0x13702, // ol - 0xe: 0x3970b, // oncuechange - 0x10: 0x14b03, // bdo - 0x11: 0x11505, // audio - 0x12: 0x17a09, // draggable - 0x14: 0x2f105, // video - 0x15: 0x2b102, // mn - 0x16: 0x38704, // menu - 0x17: 0x2cf06, // poster - 0x19: 0xf606, // footer - 0x1a: 0x2a806, // method - 0x1b: 0x2b808, // datetime - 0x1c: 0x19507, // onabort - 0x1d: 0x460e, // updateviacache - 0x1e: 0xff05, // async - 0x1f: 0x49d06, // onload - 0x21: 0x11908, // oncancel - 0x22: 0x62908, // onseeked - 0x23: 0x30205, // image - 0x24: 0x5d812, // onrejectionhandled - 0x26: 0x17404, // link - 0x27: 0x51d06, // output - 0x28: 0x33104, // head - 0x29: 0x4ff0c, // onmouseleave - 0x2a: 0x57f07, // onpaste - 0x2b: 0x5a409, // onplaying - 0x2c: 0x1c407, // colspan - 0x2f: 0x1bf05, // color - 0x30: 0x5f504, // size - 0x31: 0x2e80a, // http-equiv - 0x33: 0x601, // i - 0x34: 0x5590a, // onpagehide - 0x35: 0x68c14, // onunhandledrejection - 0x37: 0x42a07, // onerror - 0x3a: 0x3b08, // basefont - 0x3f: 0x1303, // nav - 0x40: 0x17704, // kind - 0x41: 0x35708, // readonly - 0x42: 0x30806, // mglyph - 0x44: 0xb202, // li - 0x46: 0x2d506, // hidden - 0x47: 0x70e03, // svg - 0x48: 0x58304, // step - 0x49: 0x23f09, // integrity - 0x4a: 0x58606, // public - 0x4c: 0x1ab03, // col - 0x4d: 0x1870a, // blockquote - 0x4e: 0x34f02, // h5 - 0x50: 0x5b908, // progress - 0x51: 0x5f505, // sizes - 0x52: 0x34502, // h4 - 0x56: 0x33005, // thead - 0x57: 0xd607, // keytype - 0x58: 0x5b70a, // onprogress - 0x59: 0x44b09, // inputmode - 0x5a: 0x3b109, // ondragend - 0x5d: 0x3a205, // oncut - 0x5e: 0x43706, // spacer - 0x5f: 0x1ab08, // colgroup - 0x62: 0x16502, // is - 0x65: 0x3c02, // as - 0x66: 0x54809, // onoffline - 0x67: 0x33706, // sorted - 0x69: 0x48d10, // onlanguagechange - 0x6c: 0x43d0c, // onhashchange - 0x6d: 0x9604, // name - 0x6e: 0xf505, // tfoot - 0x6f: 0x56104, // desc - 0x70: 0x33d03, // max - 0x72: 0x1ea06, // coords - 0x73: 0x30d02, // h3 - 0x74: 0x6e70e, // onbeforeunload - 0x75: 0x9c04, // rows - 0x76: 0x63c06, // select - 0x77: 0x9805, // meter - 0x78: 0x38b06, // itemid - 0x79: 0x53c0c, // onmousewheel - 0x7a: 0x5c006, // srcdoc - 0x7d: 0x1ba05, // track - 0x7f: 0x31f08, // itemtype - 0x82: 0xa402, // mo - 0x83: 0x41b08, // onchange - 0x84: 0x33107, // headers - 0x85: 0x5cc0c, // onratechange - 0x86: 0x60819, // onsecuritypolicyviolation - 0x88: 0x4a508, // datalist - 0x89: 0x4e80b, // onmousedown - 0x8a: 0x1ef04, // slot - 0x8b: 0x4b010, // onloadedmetadata - 0x8c: 0x1a06, // accept - 0x8d: 0x26806, // object - 0x91: 0x6b30e, // onvolumechange - 0x92: 0x2107, // charset - 0x93: 0x27613, // onautocompleteerror - 0x94: 0xc113, // allowpaymentrequest - 0x95: 0x2804, // body - 0x96: 0x10a07, // default - 0x97: 0x63c08, // selected - 0x98: 0x21e04, // face - 0x99: 0x1e505, // shape - 0x9b: 0x68408, // ontoggle - 0x9e: 0x64b02, // dt - 0x9f: 0xb604, // mark - 0xa1: 0xb01, // u - 0xa4: 0x6ab08, // onunload - 0xa5: 0x5d04, // loop - 0xa6: 0x16408, // disabled - 0xaa: 0x42307, // onended - 0xab: 0xb00a, // malignmark - 0xad: 0x67b09, // onsuspend - 0xae: 0x35105, // mtext - 0xaf: 0x64f06, // onsort - 0xb0: 0x19d08, // itemprop - 0xb3: 0x67109, // itemscope - 0xb4: 0x17305, // blink - 0xb6: 0x3b106, // ondrag - 0xb7: 0xa702, // ul - 0xb8: 0x26e04, // form - 0xb9: 0x12907, // sandbox - 0xba: 0x8b05, // frame - 0xbb: 0x1505, // value - 0xbc: 0x66209, // onstorage - 0xbf: 0xaa07, // acronym - 0xc0: 0x19a02, // rt - 0xc2: 0x202, // br - 0xc3: 0x22608, // fieldset - 0xc4: 0x2900d, // typemustmatch - 0xc5: 0xa208, // nomodule - 0xc6: 0x6c07, // noembed - 0xc7: 0x69e0d, // onbeforeprint - 0xc8: 0x19106, // button - 0xc9: 0x2f507, // onclick - 0xca: 0x70407, // summary - 0xcd: 0xfb04, // ruby - 0xce: 0x56405, // class - 0xcf: 0x3f40b, // ondragstart - 0xd0: 0x23107, // caption - 0xd4: 0xdd0e, // allowusermedia - 0xd5: 0x4cf0b, // onloadstart - 0xd9: 0x16b03, // div - 0xda: 0x4a904, // list - 0xdb: 0x32e04, // math - 0xdc: 0x44b05, // input - 0xdf: 0x3ea0a, // ondragover - 0xe0: 0x2de02, // h2 - 0xe2: 0x1b209, // plaintext - 0xe4: 0x4f30c, // onmouseenter - 0xe7: 0x47907, // checked - 0xe8: 0x47003, // pre - 0xea: 0x35f08, // multiple - 0xeb: 0xba03, // bdi - 0xec: 0x33d09, // maxlength - 0xed: 0xcf01, // q - 0xee: 0x61f0a, // onauxclick - 0xf0: 0x57c03, // wbr - 0xf2: 0x3b04, // base - 0xf3: 0x6e306, // option - 0xf5: 0x41310, // ondurationchange - 0xf7: 0x8908, // noframes - 0xf9: 0x40508, // dropzone - 0xfb: 0x67505, // scope - 0xfc: 0x8008, // reversed - 0xfd: 0x3ba0b, // ondragenter - 0xfe: 0x3fa05, // start - 0xff: 0x12f03, // xmp - 0x100: 0x5f907, // srclang - 0x101: 0x30703, // img - 0x104: 0x101, // b - 0x105: 0x25403, // for - 0x106: 0x10705, // aside - 0x107: 0x44907, // oninput - 0x108: 0x35604, // area - 0x109: 0x2a40a, // formmethod - 0x10a: 0x72604, // wrap - 0x10c: 0x23c02, // rp - 0x10d: 0x46b0a, // onkeypress - 0x10e: 0x6802, // tt - 0x110: 0x34702, // mi - 0x111: 0x36705, // muted - 0x112: 0xf303, // alt - 0x113: 0x5c504, // code - 0x114: 0x6e02, // em - 0x115: 0x3c50a, // ondragexit - 0x117: 0x9f04, // span - 0x119: 0x6d708, // manifest - 0x11a: 0x38708, // menuitem - 0x11b: 0x58b07, // content - 0x11d: 0x6c109, // onwaiting - 0x11f: 0x4c609, // onloadend - 0x121: 0x37e0d, // oncontextmenu - 0x123: 0x56d06, // onblur - 0x124: 0x3fc07, // article - 0x125: 0x9303, // dir - 0x126: 0xef04, // ping - 0x127: 0x24c08, // required - 0x128: 0x45509, // oninvalid - 0x129: 0xb105, // align - 0x12b: 0x58a04, // icon - 0x12c: 0x64d02, // h6 - 0x12d: 0x1c404, // cols - 0x12e: 0x22e0a, // figcaption - 0x12f: 0x45e09, // onkeydown - 0x130: 0x66b08, // onsubmit - 0x131: 0x14d09, // oncanplay - 0x132: 0x70b03, // sup - 0x133: 0xc01, // p - 0x135: 0x40a09, // onemptied - 0x136: 0x39106, // oncopy - 0x137: 0x19c04, // cite - 0x138: 0x3a70a, // ondblclick - 0x13a: 0x50b0b, // onmousemove - 0x13c: 0x66d03, // sub - 0x13d: 0x48703, // rel - 0x13e: 0x5f08, // optgroup - 0x142: 0x9c07, // rowspan - 0x143: 0x37806, // source - 0x144: 0x21608, // noscript - 0x145: 0x1a304, // open - 0x146: 0x20403, // ins - 0x147: 0x2540d, // foreignObject - 0x148: 0x5ad0a, // onpopstate - 0x14a: 0x28d07, // enctype - 0x14b: 0x2760e, // onautocomplete - 0x14c: 0x35208, // textarea - 0x14e: 0x2780c, // autocomplete - 0x14f: 0x15702, // hr - 0x150: 0x1de08, // controls - 0x151: 0x10902, // id - 0x153: 0x2360c, // onafterprint - 0x155: 0x2610d, // foreignobject - 0x156: 0x32707, // marquee - 0x157: 0x59a07, // onpause - 0x158: 0x5e602, // dl - 0x159: 0x5206, // height - 0x15a: 0x34703, // min - 0x15b: 0x9307, // dirname - 0x15c: 0x1f209, // translate - 0x15d: 0x5604, // html - 0x15e: 0x34709, // minlength - 0x15f: 0x48607, // preload - 0x160: 0x71408, // template - 0x161: 0x3df0b, // ondragleave - 0x162: 0x3a02, // rb - 0x164: 0x5c003, // src - 0x165: 0x6dd06, // strong - 0x167: 0x7804, // samp - 0x168: 0x6f307, // address - 0x169: 0x55108, // ononline - 0x16b: 0x1310b, // placeholder - 0x16c: 0x2c406, // target - 0x16d: 0x20605, // small - 0x16e: 0x6ca07, // onwheel - 0x16f: 0x1c90a, // annotation - 0x170: 0x4740a, // spellcheck - 0x171: 0x7207, // details - 0x172: 0x10306, // canvas - 0x173: 0x12109, // autofocus - 0x174: 0xc05, // param - 0x176: 0x46308, // download - 0x177: 0x45203, // del - 0x178: 0x36c07, // onclose - 0x179: 0xb903, // kbd - 0x17a: 0x31906, // applet - 0x17b: 0x2e004, // href - 0x17c: 0x5f108, // onresize - 0x17e: 0x49d0c, // onloadeddata - 0x180: 0xcc02, // tr - 0x181: 0x2c00a, // formtarget - 0x182: 0x11005, // title - 0x183: 0x6ff05, // style - 0x184: 0xd206, // strike - 0x185: 0x59e06, // usemap - 0x186: 0x2fc06, // iframe - 0x187: 0x1004, // main - 0x189: 0x7b07, // picture - 0x18c: 0x31605, // ismap - 0x18e: 0x4a504, // data - 0x18f: 0x5905, // label - 0x191: 0x3d10e, // referrerpolicy - 0x192: 0x15602, // th - 0x194: 0x53606, // prompt - 0x195: 0x56807, // section - 0x197: 0x6d107, // optimum - 0x198: 0x2db04, // high - 0x199: 0x15c02, // h1 - 0x19a: 0x65909, // onstalled - 0x19b: 0x16d03, // var - 0x19c: 0x4204, // time - 0x19e: 0x67402, // ms - 0x19f: 0x33106, // header - 0x1a0: 0x4da09, // onmessage - 0x1a1: 0x1a605, // nonce - 0x1a2: 0x26e0a, // formaction - 0x1a3: 0x22006, // center - 0x1a4: 0x3704, // nobr - 0x1a5: 0x59505, // table - 0x1a6: 0x4a907, // listing - 0x1a7: 0x18106, // legend - 0x1a9: 0x29b09, // challenge - 0x1aa: 0x24806, // figure - 0x1ab: 0xe605, // media - 0x1ae: 0xd904, // type - 0x1af: 0x3f04, // font - 0x1b0: 0x4da0e, // onmessageerror - 0x1b1: 0x37108, // seamless - 0x1b2: 0x8703, // dfn - 0x1b3: 0x5c705, // defer - 0x1b4: 0xc303, // low - 0x1b5: 0x19a03, // rtc - 0x1b6: 0x5230b, // onmouseover - 0x1b7: 0x2b20a, // novalidate - 0x1b8: 0x71c0a, // workertype - 0x1ba: 0x3cd07, // itemref - 0x1bd: 0x1, // a - 0x1be: 0x31803, // map - 0x1bf: 0x400c, // ontimeupdate - 0x1c0: 0x15e07, // bgsound - 0x1c1: 0x3206, // keygen - 0x1c2: 0x2705, // tbody - 0x1c5: 0x64406, // onshow - 0x1c7: 0x2501, // s - 0x1c8: 0x6607, // pattern - 0x1cc: 0x14d10, // oncanplaythrough - 0x1ce: 0x2d702, // dd - 0x1cf: 0x6f906, // srcset - 0x1d0: 0x17003, // big - 0x1d2: 0x65108, // sortable - 0x1d3: 0x48007, // onkeyup - 0x1d5: 0x5a406, // onplay - 0x1d7: 0x4b804, // meta - 0x1d8: 0x40306, // ondrop - 0x1da: 0x60008, // onscroll - 0x1db: 0x1fb0b, // crossorigin - 0x1dc: 0x5730a, // onpageshow - 0x1dd: 0x4, // abbr - 0x1de: 0x9202, // td - 0x1df: 0x58b0f, // contenteditable - 0x1e0: 0x27206, // action - 0x1e1: 0x1400b, // playsinline - 0x1e2: 0x43107, // onfocus - 0x1e3: 0x2e008, // hreflang - 0x1e5: 0x5160a, // onmouseout - 0x1e6: 0x5ea07, // onreset - 0x1e7: 0x13c08, // autoplay - 0x1e8: 0x63109, // onseeking - 0x1ea: 0x67506, // scoped - 0x1ec: 0x30a, // radiogroup - 0x1ee: 0x3800b, // contextmenu - 0x1ef: 0x52e09, // onmouseup - 0x1f1: 0x2ca06, // hgroup - 0x1f2: 0x2080f, // allowfullscreen - 0x1f3: 0x4be08, // tabindex - 0x1f6: 0x30f07, // isindex - 0x1f7: 0x1a0e, // accept-charset - 0x1f8: 0x2ae0e, // formnovalidate - 0x1fb: 0x1c90e, // annotation-xml - 0x1fc: 0x6e05, // embed - 0x1fd: 0x21806, // script - 0x1fe: 0xbb06, // dialog - 0x1ff: 0x1d707, // command -} - -const atomText = "abbradiogrouparamainavalueaccept-charsetbodyaccesskeygenobrb" + - "asefontimeupdateviacacheightmlabelooptgroupatternoembedetail" + - "sampictureversedfnoframesetdirnameterowspanomoduleacronymali" + - "gnmarkbdialogallowpaymentrequestrikeytypeallowusermediagroup" + - "ingaltfooterubyasyncanvasidefaultitleaudioncancelautofocusan" + - "dboxmplaceholderautoplaysinlinebdoncanplaythrough1bgsoundisa" + - "bledivarbigblinkindraggablegendblockquotebuttonabortcitempro" + - "penoncecolgrouplaintextrackcolorcolspannotation-xmlcommandco" + - "ntrolshapecoordslotranslatecrossoriginsmallowfullscreenoscri" + - "ptfacenterfieldsetfigcaptionafterprintegrityfigurequiredfore" + - "ignObjectforeignobjectformactionautocompleteerrorformenctype" + - "mustmatchallengeformmethodformnovalidatetimeformtargethgroup" + - "osterhiddenhigh2hreflanghttp-equivideonclickiframeimageimgly" + - "ph3isindexismappletitemtypemarqueematheadersortedmaxlength4m" + - "inlength5mtextareadonlymultiplemutedoncloseamlessourceoncont" + - "extmenuitemidoncopyoncuechangeoncutondblclickondragendondrag" + - "enterondragexitemreferrerpolicyondragleaveondragoverondragst" + - "articleondropzonemptiedondurationchangeonendedonerroronfocus" + - "paceronhashchangeoninputmodeloninvalidonkeydownloadonkeypres" + - "spellcheckedonkeyupreloadonlanguagechangeonloadeddatalisting" + - "onloadedmetadatabindexonloadendonloadstartonmessageerroronmo" + - "usedownonmouseenteronmouseleaveonmousemoveonmouseoutputonmou" + - "seoveronmouseupromptonmousewheelonofflineononlineonpagehides" + - "classectionbluronpageshowbronpastepublicontenteditableonpaus" + - "emaponplayingonpopstateonprogressrcdocodeferonratechangeonre" + - "jectionhandledonresetonresizesrclangonscrollonsecuritypolicy" + - "violationauxclickonseekedonseekingonselectedonshowidth6onsor" + - "tableonstalledonstorageonsubmitemscopedonsuspendontoggleonun" + - "handledrejectionbeforeprintonunloadonvolumechangeonwaitingon" + - "wheeloptimumanifestrongoptionbeforeunloaddressrcsetstylesumm" + - "arysupsvgsystemplateworkertypewrap" diff --git a/vendor/golang.org/x/net/html/charset/charset.go b/vendor/golang.org/x/net/html/charset/charset.go deleted file mode 100644 index 13bed15..0000000 --- a/vendor/golang.org/x/net/html/charset/charset.go +++ /dev/null @@ -1,257 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package charset provides common text encodings for HTML documents. -// -// The mapping from encoding labels to encodings is defined at -// https://encoding.spec.whatwg.org/. -package charset // import "golang.org/x/net/html/charset" - -import ( - "bytes" - "fmt" - "io" - "mime" - "strings" - "unicode/utf8" - - "golang.org/x/net/html" - "golang.org/x/text/encoding" - "golang.org/x/text/encoding/charmap" - "golang.org/x/text/encoding/htmlindex" - "golang.org/x/text/transform" -) - -// Lookup returns the encoding with the specified label, and its canonical -// name. It returns nil and the empty string if label is not one of the -// standard encodings for HTML. Matching is case-insensitive and ignores -// leading and trailing whitespace. Encoders will use HTML escape sequences for -// runes that are not supported by the character set. -func Lookup(label string) (e encoding.Encoding, name string) { - e, err := htmlindex.Get(label) - if err != nil { - return nil, "" - } - name, _ = htmlindex.Name(e) - return &htmlEncoding{e}, name -} - -type htmlEncoding struct{ encoding.Encoding } - -func (h *htmlEncoding) NewEncoder() *encoding.Encoder { - // HTML requires a non-terminating legacy encoder. We use HTML escapes to - // substitute unsupported code points. - return encoding.HTMLEscapeUnsupported(h.Encoding.NewEncoder()) -} - -// DetermineEncoding determines the encoding of an HTML document by examining -// up to the first 1024 bytes of content and the declared Content-Type. -// -// See http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#determining-the-character-encoding -func DetermineEncoding(content []byte, contentType string) (e encoding.Encoding, name string, certain bool) { - if len(content) > 1024 { - content = content[:1024] - } - - for _, b := range boms { - if bytes.HasPrefix(content, b.bom) { - e, name = Lookup(b.enc) - return e, name, true - } - } - - if _, params, err := mime.ParseMediaType(contentType); err == nil { - if cs, ok := params["charset"]; ok { - if e, name = Lookup(cs); e != nil { - return e, name, true - } - } - } - - if len(content) > 0 { - e, name = prescan(content) - if e != nil { - return e, name, false - } - } - - // Try to detect UTF-8. - // First eliminate any partial rune at the end. - for i := len(content) - 1; i >= 0 && i > len(content)-4; i-- { - b := content[i] - if b < 0x80 { - break - } - if utf8.RuneStart(b) { - content = content[:i] - break - } - } - hasHighBit := false - for _, c := range content { - if c >= 0x80 { - hasHighBit = true - break - } - } - if hasHighBit && utf8.Valid(content) { - return encoding.Nop, "utf-8", false - } - - // TODO: change default depending on user's locale? - return charmap.Windows1252, "windows-1252", false -} - -// NewReader returns an io.Reader that converts the content of r to UTF-8. -// It calls DetermineEncoding to find out what r's encoding is. -func NewReader(r io.Reader, contentType string) (io.Reader, error) { - preview := make([]byte, 1024) - n, err := io.ReadFull(r, preview) - switch { - case err == io.ErrUnexpectedEOF: - preview = preview[:n] - r = bytes.NewReader(preview) - case err != nil: - return nil, err - default: - r = io.MultiReader(bytes.NewReader(preview), r) - } - - if e, _, _ := DetermineEncoding(preview, contentType); e != encoding.Nop { - r = transform.NewReader(r, e.NewDecoder()) - } - return r, nil -} - -// NewReaderLabel returns a reader that converts from the specified charset to -// UTF-8. It uses Lookup to find the encoding that corresponds to label, and -// returns an error if Lookup returns nil. It is suitable for use as -// encoding/xml.Decoder's CharsetReader function. -func NewReaderLabel(label string, input io.Reader) (io.Reader, error) { - e, _ := Lookup(label) - if e == nil { - return nil, fmt.Errorf("unsupported charset: %q", label) - } - return transform.NewReader(input, e.NewDecoder()), nil -} - -func prescan(content []byte) (e encoding.Encoding, name string) { - z := html.NewTokenizer(bytes.NewReader(content)) - for { - switch z.Next() { - case html.ErrorToken: - return nil, "" - - case html.StartTagToken, html.SelfClosingTagToken: - tagName, hasAttr := z.TagName() - if !bytes.Equal(tagName, []byte("meta")) { - continue - } - attrList := make(map[string]bool) - gotPragma := false - - const ( - dontKnow = iota - doNeedPragma - doNotNeedPragma - ) - needPragma := dontKnow - - name = "" - e = nil - for hasAttr { - var key, val []byte - key, val, hasAttr = z.TagAttr() - ks := string(key) - if attrList[ks] { - continue - } - attrList[ks] = true - for i, c := range val { - if 'A' <= c && c <= 'Z' { - val[i] = c + 0x20 - } - } - - switch ks { - case "http-equiv": - if bytes.Equal(val, []byte("content-type")) { - gotPragma = true - } - - case "content": - if e == nil { - name = fromMetaElement(string(val)) - if name != "" { - e, name = Lookup(name) - if e != nil { - needPragma = doNeedPragma - } - } - } - - case "charset": - e, name = Lookup(string(val)) - needPragma = doNotNeedPragma - } - } - - if needPragma == dontKnow || needPragma == doNeedPragma && !gotPragma { - continue - } - - if strings.HasPrefix(name, "utf-16") { - name = "utf-8" - e = encoding.Nop - } - - if e != nil { - return e, name - } - } - } -} - -func fromMetaElement(s string) string { - for s != "" { - csLoc := strings.Index(s, "charset") - if csLoc == -1 { - return "" - } - s = s[csLoc+len("charset"):] - s = strings.TrimLeft(s, " \t\n\f\r") - if !strings.HasPrefix(s, "=") { - continue - } - s = s[1:] - s = strings.TrimLeft(s, " \t\n\f\r") - if s == "" { - return "" - } - if q := s[0]; q == '"' || q == '\'' { - s = s[1:] - closeQuote := strings.IndexRune(s, rune(q)) - if closeQuote == -1 { - return "" - } - return s[:closeQuote] - } - - end := strings.IndexAny(s, "; \t\n\f\r") - if end == -1 { - end = len(s) - } - return s[:end] - } - return "" -} - -var boms = []struct { - bom []byte - enc string -}{ - {[]byte{0xfe, 0xff}, "utf-16be"}, - {[]byte{0xff, 0xfe}, "utf-16le"}, - {[]byte{0xef, 0xbb, 0xbf}, "utf-8"}, -} diff --git a/vendor/golang.org/x/net/html/const.go b/vendor/golang.org/x/net/html/const.go deleted file mode 100644 index ff7acf2..0000000 --- a/vendor/golang.org/x/net/html/const.go +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package html - -// Section 12.2.4.2 of the HTML5 specification says "The following elements -// have varying levels of special parsing rules". -// https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements -var isSpecialElementMap = map[string]bool{ - "address": true, - "applet": true, - "area": true, - "article": true, - "aside": true, - "base": true, - "basefont": true, - "bgsound": true, - "blockquote": true, - "body": true, - "br": true, - "button": true, - "caption": true, - "center": true, - "col": true, - "colgroup": true, - "dd": true, - "details": true, - "dir": true, - "div": true, - "dl": true, - "dt": true, - "embed": true, - "fieldset": true, - "figcaption": true, - "figure": true, - "footer": true, - "form": true, - "frame": true, - "frameset": true, - "h1": true, - "h2": true, - "h3": true, - "h4": true, - "h5": true, - "h6": true, - "head": true, - "header": true, - "hgroup": true, - "hr": true, - "html": true, - "iframe": true, - "img": true, - "input": true, - "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility. - "li": true, - "link": true, - "listing": true, - "main": true, - "marquee": true, - "menu": true, - "meta": true, - "nav": true, - "noembed": true, - "noframes": true, - "noscript": true, - "object": true, - "ol": true, - "p": true, - "param": true, - "plaintext": true, - "pre": true, - "script": true, - "section": true, - "select": true, - "source": true, - "style": true, - "summary": true, - "table": true, - "tbody": true, - "td": true, - "template": true, - "textarea": true, - "tfoot": true, - "th": true, - "thead": true, - "title": true, - "tr": true, - "track": true, - "ul": true, - "wbr": true, - "xmp": true, -} - -func isSpecialElement(element *Node) bool { - switch element.Namespace { - case "", "html": - return isSpecialElementMap[element.Data] - case "math": - switch element.Data { - case "mi", "mo", "mn", "ms", "mtext", "annotation-xml": - return true - } - case "svg": - switch element.Data { - case "foreignObject", "desc", "title": - return true - } - } - return false -} diff --git a/vendor/golang.org/x/net/html/doc.go b/vendor/golang.org/x/net/html/doc.go deleted file mode 100644 index 822ed42..0000000 --- a/vendor/golang.org/x/net/html/doc.go +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright 2010 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -/* -Package html implements an HTML5-compliant tokenizer and parser. - -Tokenization is done by creating a Tokenizer for an io.Reader r. It is the -caller's responsibility to ensure that r provides UTF-8 encoded HTML. - - z := html.NewTokenizer(r) - -Given a Tokenizer z, the HTML is tokenized by repeatedly calling z.Next(), -which parses the next token and returns its type, or an error: - - for { - tt := z.Next() - if tt == html.ErrorToken { - // ... - return ... - } - // Process the current token. - } - -There are two APIs for retrieving the current token. The high-level API is to -call Token; the low-level API is to call Text or TagName / TagAttr. Both APIs -allow optionally calling Raw after Next but before Token, Text, TagName, or -TagAttr. In EBNF notation, the valid call sequence per token is: - - Next {Raw} [ Token | Text | TagName {TagAttr} ] - -Token returns an independent data structure that completely describes a token. -Entities (such as "<") are unescaped, tag names and attribute keys are -lower-cased, and attributes are collected into a []Attribute. For example: - - for { - if z.Next() == html.ErrorToken { - // Returning io.EOF indicates success. - return z.Err() - } - emitToken(z.Token()) - } - -The low-level API performs fewer allocations and copies, but the contents of -the []byte values returned by Text, TagName and TagAttr may change on the next -call to Next. For example, to extract an HTML page's anchor text: - - depth := 0 - for { - tt := z.Next() - switch tt { - case html.ErrorToken: - return z.Err() - case html.TextToken: - if depth > 0 { - // emitBytes should copy the []byte it receives, - // if it doesn't process it immediately. - emitBytes(z.Text()) - } - case html.StartTagToken, html.EndTagToken: - tn, _ := z.TagName() - if len(tn) == 1 && tn[0] == 'a' { - if tt == html.StartTagToken { - depth++ - } else { - depth-- - } - } - } - } - -Parsing is done by calling Parse with an io.Reader, which returns the root of -the parse tree (the document element) as a *Node. It is the caller's -responsibility to ensure that the Reader provides UTF-8 encoded HTML. For -example, to process each anchor node in depth-first order: - - doc, err := html.Parse(r) - if err != nil { - // ... - } - var f func(*html.Node) - f = func(n *html.Node) { - if n.Type == html.ElementNode && n.Data == "a" { - // Do something with n... - } - for c := n.FirstChild; c != nil; c = c.NextSibling { - f(c) - } - } - f(doc) - -The relevant specifications include: -https://html.spec.whatwg.org/multipage/syntax.html and -https://html.spec.whatwg.org/multipage/syntax.html#tokenization -*/ -package html // import "golang.org/x/net/html" - -// The tokenization algorithm implemented by this package is not a line-by-line -// transliteration of the relatively verbose state-machine in the WHATWG -// specification. A more direct approach is used instead, where the program -// counter implies the state, such as whether it is tokenizing a tag or a text -// node. Specification compliance is verified by checking expected and actual -// outputs over a test suite rather than aiming for algorithmic fidelity. - -// TODO(nigeltao): Does a DOM API belong in this package or a separate one? -// TODO(nigeltao): How does parsing interact with a JavaScript engine? diff --git a/vendor/golang.org/x/net/html/doctype.go b/vendor/golang.org/x/net/html/doctype.go deleted file mode 100644 index c484e5a..0000000 --- a/vendor/golang.org/x/net/html/doctype.go +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package html - -import ( - "strings" -) - -// parseDoctype parses the data from a DoctypeToken into a name, -// public identifier, and system identifier. It returns a Node whose Type -// is DoctypeNode, whose Data is the name, and which has attributes -// named "system" and "public" for the two identifiers if they were present. -// quirks is whether the document should be parsed in "quirks mode". -func parseDoctype(s string) (n *Node, quirks bool) { - n = &Node{Type: DoctypeNode} - - // Find the name. - space := strings.IndexAny(s, whitespace) - if space == -1 { - space = len(s) - } - n.Data = s[:space] - // The comparison to "html" is case-sensitive. - if n.Data != "html" { - quirks = true - } - n.Data = strings.ToLower(n.Data) - s = strings.TrimLeft(s[space:], whitespace) - - if len(s) < 6 { - // It can't start with "PUBLIC" or "SYSTEM". - // Ignore the rest of the string. - return n, quirks || s != "" - } - - key := strings.ToLower(s[:6]) - s = s[6:] - for key == "public" || key == "system" { - s = strings.TrimLeft(s, whitespace) - if s == "" { - break - } - quote := s[0] - if quote != '"' && quote != '\'' { - break - } - s = s[1:] - q := strings.IndexRune(s, rune(quote)) - var id string - if q == -1 { - id = s - s = "" - } else { - id = s[:q] - s = s[q+1:] - } - n.Attr = append(n.Attr, Attribute{Key: key, Val: id}) - if key == "public" { - key = "system" - } else { - key = "" - } - } - - if key != "" || s != "" { - quirks = true - } else if len(n.Attr) > 0 { - if n.Attr[0].Key == "public" { - public := strings.ToLower(n.Attr[0].Val) - switch public { - case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html": - quirks = true - default: - for _, q := range quirkyIDs { - if strings.HasPrefix(public, q) { - quirks = true - break - } - } - } - // The following two public IDs only cause quirks mode if there is no system ID. - if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") || - strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) { - quirks = true - } - } - if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" && - strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" { - quirks = true - } - } - - return n, quirks -} - -// quirkyIDs is a list of public doctype identifiers that cause a document -// to be interpreted in quirks mode. The identifiers should be in lower case. -var quirkyIDs = []string{ - "+//silmaril//dtd html pro v0r11 19970101//", - "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", - "-//as//dtd html 3.0 aswedit + extensions//", - "-//ietf//dtd html 2.0 level 1//", - "-//ietf//dtd html 2.0 level 2//", - "-//ietf//dtd html 2.0 strict level 1//", - "-//ietf//dtd html 2.0 strict level 2//", - "-//ietf//dtd html 2.0 strict//", - "-//ietf//dtd html 2.0//", - "-//ietf//dtd html 2.1e//", - "-//ietf//dtd html 3.0//", - "-//ietf//dtd html 3.2 final//", - "-//ietf//dtd html 3.2//", - "-//ietf//dtd html 3//", - "-//ietf//dtd html level 0//", - "-//ietf//dtd html level 1//", - "-//ietf//dtd html level 2//", - "-//ietf//dtd html level 3//", - "-//ietf//dtd html strict level 0//", - "-//ietf//dtd html strict level 1//", - "-//ietf//dtd html strict level 2//", - "-//ietf//dtd html strict level 3//", - "-//ietf//dtd html strict//", - "-//ietf//dtd html//", - "-//metrius//dtd metrius presentational//", - "-//microsoft//dtd internet explorer 2.0 html strict//", - "-//microsoft//dtd internet explorer 2.0 html//", - "-//microsoft//dtd internet explorer 2.0 tables//", - "-//microsoft//dtd internet explorer 3.0 html strict//", - "-//microsoft//dtd internet explorer 3.0 html//", - "-//microsoft//dtd internet explorer 3.0 tables//", - "-//netscape comm. corp.//dtd html//", - "-//netscape comm. corp.//dtd strict html//", - "-//o'reilly and associates//dtd html 2.0//", - "-//o'reilly and associates//dtd html extended 1.0//", - "-//o'reilly and associates//dtd html extended relaxed 1.0//", - "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", - "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", - "-//spyglass//dtd html 2.0 extended//", - "-//sq//dtd html 2.0 hotmetal + extensions//", - "-//sun microsystems corp.//dtd hotjava html//", - "-//sun microsystems corp.//dtd hotjava strict html//", - "-//w3c//dtd html 3 1995-03-24//", - "-//w3c//dtd html 3.2 draft//", - "-//w3c//dtd html 3.2 final//", - "-//w3c//dtd html 3.2//", - "-//w3c//dtd html 3.2s draft//", - "-//w3c//dtd html 4.0 frameset//", - "-//w3c//dtd html 4.0 transitional//", - "-//w3c//dtd html experimental 19960712//", - "-//w3c//dtd html experimental 970421//", - "-//w3c//dtd w3 html//", - "-//w3o//dtd w3 html 3.0//", - "-//webtechs//dtd mozilla html 2.0//", - "-//webtechs//dtd mozilla html//", -} diff --git a/vendor/golang.org/x/net/html/entity.go b/vendor/golang.org/x/net/html/entity.go deleted file mode 100644 index b628880..0000000 --- a/vendor/golang.org/x/net/html/entity.go +++ /dev/null @@ -1,2253 +0,0 @@ -// Copyright 2010 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package html - -// All entities that do not end with ';' are 6 or fewer bytes long. -const longestEntityWithoutSemicolon = 6 - -// entity is a map from HTML entity names to their values. The semicolon matters: -// https://html.spec.whatwg.org/multipage/syntax.html#named-character-references -// lists both "amp" and "amp;" as two separate entries. -// -// Note that the HTML5 list is larger than the HTML4 list at -// http://www.w3.org/TR/html4/sgml/entities.html -var entity = map[string]rune{ - "AElig;": '\U000000C6', - "AMP;": '\U00000026', - "Aacute;": '\U000000C1', - "Abreve;": '\U00000102', - "Acirc;": '\U000000C2', - "Acy;": '\U00000410', - "Afr;": '\U0001D504', - "Agrave;": '\U000000C0', - "Alpha;": '\U00000391', - "Amacr;": '\U00000100', - "And;": '\U00002A53', - "Aogon;": '\U00000104', - "Aopf;": '\U0001D538', - "ApplyFunction;": '\U00002061', - "Aring;": '\U000000C5', - "Ascr;": '\U0001D49C', - "Assign;": '\U00002254', - "Atilde;": '\U000000C3', - "Auml;": '\U000000C4', - "Backslash;": '\U00002216', - "Barv;": '\U00002AE7', - "Barwed;": '\U00002306', - "Bcy;": '\U00000411', - "Because;": '\U00002235', - "Bernoullis;": '\U0000212C', - "Beta;": '\U00000392', - "Bfr;": '\U0001D505', - "Bopf;": '\U0001D539', - "Breve;": '\U000002D8', - "Bscr;": '\U0000212C', - "Bumpeq;": '\U0000224E', - "CHcy;": '\U00000427', - "COPY;": '\U000000A9', - "Cacute;": '\U00000106', - "Cap;": '\U000022D2', - "CapitalDifferentialD;": '\U00002145', - "Cayleys;": '\U0000212D', - "Ccaron;": '\U0000010C', - "Ccedil;": '\U000000C7', - "Ccirc;": '\U00000108', - "Cconint;": '\U00002230', - "Cdot;": '\U0000010A', - "Cedilla;": '\U000000B8', - "CenterDot;": '\U000000B7', - "Cfr;": '\U0000212D', - "Chi;": '\U000003A7', - "CircleDot;": '\U00002299', - "CircleMinus;": '\U00002296', - "CirclePlus;": '\U00002295', - "CircleTimes;": '\U00002297', - "ClockwiseContourIntegral;": '\U00002232', - "CloseCurlyDoubleQuote;": '\U0000201D', - "CloseCurlyQuote;": '\U00002019', - "Colon;": '\U00002237', - "Colone;": '\U00002A74', - "Congruent;": '\U00002261', - "Conint;": '\U0000222F', - "ContourIntegral;": '\U0000222E', - "Copf;": '\U00002102', - "Coproduct;": '\U00002210', - "CounterClockwiseContourIntegral;": '\U00002233', - "Cross;": '\U00002A2F', - "Cscr;": '\U0001D49E', - "Cup;": '\U000022D3', - "CupCap;": '\U0000224D', - "DD;": '\U00002145', - "DDotrahd;": '\U00002911', - "DJcy;": '\U00000402', - "DScy;": '\U00000405', - "DZcy;": '\U0000040F', - "Dagger;": '\U00002021', - "Darr;": '\U000021A1', - "Dashv;": '\U00002AE4', - "Dcaron;": '\U0000010E', - "Dcy;": '\U00000414', - "Del;": '\U00002207', - "Delta;": '\U00000394', - "Dfr;": '\U0001D507', - "DiacriticalAcute;": '\U000000B4', - "DiacriticalDot;": '\U000002D9', - "DiacriticalDoubleAcute;": '\U000002DD', - "DiacriticalGrave;": '\U00000060', - "DiacriticalTilde;": '\U000002DC', - "Diamond;": '\U000022C4', - "DifferentialD;": '\U00002146', - "Dopf;": '\U0001D53B', - "Dot;": '\U000000A8', - "DotDot;": '\U000020DC', - "DotEqual;": '\U00002250', - "DoubleContourIntegral;": '\U0000222F', - "DoubleDot;": '\U000000A8', - "DoubleDownArrow;": '\U000021D3', - "DoubleLeftArrow;": '\U000021D0', - "DoubleLeftRightArrow;": '\U000021D4', - "DoubleLeftTee;": '\U00002AE4', - "DoubleLongLeftArrow;": '\U000027F8', - "DoubleLongLeftRightArrow;": '\U000027FA', - "DoubleLongRightArrow;": '\U000027F9', - "DoubleRightArrow;": '\U000021D2', - "DoubleRightTee;": '\U000022A8', - "DoubleUpArrow;": '\U000021D1', - "DoubleUpDownArrow;": '\U000021D5', - "DoubleVerticalBar;": '\U00002225', - "DownArrow;": '\U00002193', - "DownArrowBar;": '\U00002913', - "DownArrowUpArrow;": '\U000021F5', - "DownBreve;": '\U00000311', - "DownLeftRightVector;": '\U00002950', - "DownLeftTeeVector;": '\U0000295E', - "DownLeftVector;": '\U000021BD', - "DownLeftVectorBar;": '\U00002956', - "DownRightTeeVector;": '\U0000295F', - "DownRightVector;": '\U000021C1', - "DownRightVectorBar;": '\U00002957', - "DownTee;": '\U000022A4', - "DownTeeArrow;": '\U000021A7', - "Downarrow;": '\U000021D3', - "Dscr;": '\U0001D49F', - "Dstrok;": '\U00000110', - "ENG;": '\U0000014A', - "ETH;": '\U000000D0', - "Eacute;": '\U000000C9', - "Ecaron;": '\U0000011A', - "Ecirc;": '\U000000CA', - "Ecy;": '\U0000042D', - "Edot;": '\U00000116', - "Efr;": '\U0001D508', - "Egrave;": '\U000000C8', - "Element;": '\U00002208', - "Emacr;": '\U00000112', - "EmptySmallSquare;": '\U000025FB', - "EmptyVerySmallSquare;": '\U000025AB', - "Eogon;": '\U00000118', - "Eopf;": '\U0001D53C', - "Epsilon;": '\U00000395', - "Equal;": '\U00002A75', - "EqualTilde;": '\U00002242', - "Equilibrium;": '\U000021CC', - "Escr;": '\U00002130', - "Esim;": '\U00002A73', - "Eta;": '\U00000397', - "Euml;": '\U000000CB', - "Exists;": '\U00002203', - "ExponentialE;": '\U00002147', - "Fcy;": '\U00000424', - "Ffr;": '\U0001D509', - "FilledSmallSquare;": '\U000025FC', - "FilledVerySmallSquare;": '\U000025AA', - "Fopf;": '\U0001D53D', - "ForAll;": '\U00002200', - "Fouriertrf;": '\U00002131', - "Fscr;": '\U00002131', - "GJcy;": '\U00000403', - "GT;": '\U0000003E', - "Gamma;": '\U00000393', - "Gammad;": '\U000003DC', - "Gbreve;": '\U0000011E', - "Gcedil;": '\U00000122', - "Gcirc;": '\U0000011C', - "Gcy;": '\U00000413', - "Gdot;": '\U00000120', - "Gfr;": '\U0001D50A', - "Gg;": '\U000022D9', - "Gopf;": '\U0001D53E', - "GreaterEqual;": '\U00002265', - "GreaterEqualLess;": '\U000022DB', - "GreaterFullEqual;": '\U00002267', - "GreaterGreater;": '\U00002AA2', - "GreaterLess;": '\U00002277', - "GreaterSlantEqual;": '\U00002A7E', - "GreaterTilde;": '\U00002273', - "Gscr;": '\U0001D4A2', - "Gt;": '\U0000226B', - "HARDcy;": '\U0000042A', - "Hacek;": '\U000002C7', - "Hat;": '\U0000005E', - "Hcirc;": '\U00000124', - "Hfr;": '\U0000210C', - "HilbertSpace;": '\U0000210B', - "Hopf;": '\U0000210D', - "HorizontalLine;": '\U00002500', - "Hscr;": '\U0000210B', - "Hstrok;": '\U00000126', - "HumpDownHump;": '\U0000224E', - "HumpEqual;": '\U0000224F', - "IEcy;": '\U00000415', - "IJlig;": '\U00000132', - "IOcy;": '\U00000401', - "Iacute;": '\U000000CD', - "Icirc;": '\U000000CE', - "Icy;": '\U00000418', - "Idot;": '\U00000130', - "Ifr;": '\U00002111', - "Igrave;": '\U000000CC', - "Im;": '\U00002111', - "Imacr;": '\U0000012A', - "ImaginaryI;": '\U00002148', - "Implies;": '\U000021D2', - "Int;": '\U0000222C', - "Integral;": '\U0000222B', - "Intersection;": '\U000022C2', - "InvisibleComma;": '\U00002063', - "InvisibleTimes;": '\U00002062', - "Iogon;": '\U0000012E', - "Iopf;": '\U0001D540', - "Iota;": '\U00000399', - "Iscr;": '\U00002110', - "Itilde;": '\U00000128', - "Iukcy;": '\U00000406', - "Iuml;": '\U000000CF', - "Jcirc;": '\U00000134', - "Jcy;": '\U00000419', - "Jfr;": '\U0001D50D', - "Jopf;": '\U0001D541', - "Jscr;": '\U0001D4A5', - "Jsercy;": '\U00000408', - "Jukcy;": '\U00000404', - "KHcy;": '\U00000425', - "KJcy;": '\U0000040C', - "Kappa;": '\U0000039A', - "Kcedil;": '\U00000136', - "Kcy;": '\U0000041A', - "Kfr;": '\U0001D50E', - "Kopf;": '\U0001D542', - "Kscr;": '\U0001D4A6', - "LJcy;": '\U00000409', - "LT;": '\U0000003C', - "Lacute;": '\U00000139', - "Lambda;": '\U0000039B', - "Lang;": '\U000027EA', - "Laplacetrf;": '\U00002112', - "Larr;": '\U0000219E', - "Lcaron;": '\U0000013D', - "Lcedil;": '\U0000013B', - "Lcy;": '\U0000041B', - "LeftAngleBracket;": '\U000027E8', - "LeftArrow;": '\U00002190', - "LeftArrowBar;": '\U000021E4', - "LeftArrowRightArrow;": '\U000021C6', - "LeftCeiling;": '\U00002308', - "LeftDoubleBracket;": '\U000027E6', - "LeftDownTeeVector;": '\U00002961', - "LeftDownVector;": '\U000021C3', - "LeftDownVectorBar;": '\U00002959', - "LeftFloor;": '\U0000230A', - "LeftRightArrow;": '\U00002194', - "LeftRightVector;": '\U0000294E', - "LeftTee;": '\U000022A3', - "LeftTeeArrow;": '\U000021A4', - "LeftTeeVector;": '\U0000295A', - "LeftTriangle;": '\U000022B2', - "LeftTriangleBar;": '\U000029CF', - "LeftTriangleEqual;": '\U000022B4', - "LeftUpDownVector;": '\U00002951', - "LeftUpTeeVector;": '\U00002960', - "LeftUpVector;": '\U000021BF', - "LeftUpVectorBar;": '\U00002958', - "LeftVector;": '\U000021BC', - "LeftVectorBar;": '\U00002952', - "Leftarrow;": '\U000021D0', - "Leftrightarrow;": '\U000021D4', - "LessEqualGreater;": '\U000022DA', - "LessFullEqual;": '\U00002266', - "LessGreater;": '\U00002276', - "LessLess;": '\U00002AA1', - "LessSlantEqual;": '\U00002A7D', - "LessTilde;": '\U00002272', - "Lfr;": '\U0001D50F', - "Ll;": '\U000022D8', - "Lleftarrow;": '\U000021DA', - "Lmidot;": '\U0000013F', - "LongLeftArrow;": '\U000027F5', - "LongLeftRightArrow;": '\U000027F7', - "LongRightArrow;": '\U000027F6', - "Longleftarrow;": '\U000027F8', - "Longleftrightarrow;": '\U000027FA', - "Longrightarrow;": '\U000027F9', - "Lopf;": '\U0001D543', - "LowerLeftArrow;": '\U00002199', - "LowerRightArrow;": '\U00002198', - "Lscr;": '\U00002112', - "Lsh;": '\U000021B0', - "Lstrok;": '\U00000141', - "Lt;": '\U0000226A', - "Map;": '\U00002905', - "Mcy;": '\U0000041C', - "MediumSpace;": '\U0000205F', - "Mellintrf;": '\U00002133', - "Mfr;": '\U0001D510', - "MinusPlus;": '\U00002213', - "Mopf;": '\U0001D544', - "Mscr;": '\U00002133', - "Mu;": '\U0000039C', - "NJcy;": '\U0000040A', - "Nacute;": '\U00000143', - "Ncaron;": '\U00000147', - "Ncedil;": '\U00000145', - "Ncy;": '\U0000041D', - "NegativeMediumSpace;": '\U0000200B', - "NegativeThickSpace;": '\U0000200B', - "NegativeThinSpace;": '\U0000200B', - "NegativeVeryThinSpace;": '\U0000200B', - "NestedGreaterGreater;": '\U0000226B', - "NestedLessLess;": '\U0000226A', - "NewLine;": '\U0000000A', - "Nfr;": '\U0001D511', - "NoBreak;": '\U00002060', - "NonBreakingSpace;": '\U000000A0', - "Nopf;": '\U00002115', - "Not;": '\U00002AEC', - "NotCongruent;": '\U00002262', - "NotCupCap;": '\U0000226D', - "NotDoubleVerticalBar;": '\U00002226', - "NotElement;": '\U00002209', - "NotEqual;": '\U00002260', - "NotExists;": '\U00002204', - "NotGreater;": '\U0000226F', - "NotGreaterEqual;": '\U00002271', - "NotGreaterLess;": '\U00002279', - "NotGreaterTilde;": '\U00002275', - "NotLeftTriangle;": '\U000022EA', - "NotLeftTriangleEqual;": '\U000022EC', - "NotLess;": '\U0000226E', - "NotLessEqual;": '\U00002270', - "NotLessGreater;": '\U00002278', - "NotLessTilde;": '\U00002274', - "NotPrecedes;": '\U00002280', - "NotPrecedesSlantEqual;": '\U000022E0', - "NotReverseElement;": '\U0000220C', - "NotRightTriangle;": '\U000022EB', - "NotRightTriangleEqual;": '\U000022ED', - "NotSquareSubsetEqual;": '\U000022E2', - "NotSquareSupersetEqual;": '\U000022E3', - "NotSubsetEqual;": '\U00002288', - "NotSucceeds;": '\U00002281', - "NotSucceedsSlantEqual;": '\U000022E1', - "NotSupersetEqual;": '\U00002289', - "NotTilde;": '\U00002241', - "NotTildeEqual;": '\U00002244', - "NotTildeFullEqual;": '\U00002247', - "NotTildeTilde;": '\U00002249', - "NotVerticalBar;": '\U00002224', - "Nscr;": '\U0001D4A9', - "Ntilde;": '\U000000D1', - "Nu;": '\U0000039D', - "OElig;": '\U00000152', - "Oacute;": '\U000000D3', - "Ocirc;": '\U000000D4', - "Ocy;": '\U0000041E', - "Odblac;": '\U00000150', - "Ofr;": '\U0001D512', - "Ograve;": '\U000000D2', - "Omacr;": '\U0000014C', - "Omega;": '\U000003A9', - "Omicron;": '\U0000039F', - "Oopf;": '\U0001D546', - "OpenCurlyDoubleQuote;": '\U0000201C', - "OpenCurlyQuote;": '\U00002018', - "Or;": '\U00002A54', - "Oscr;": '\U0001D4AA', - "Oslash;": '\U000000D8', - "Otilde;": '\U000000D5', - "Otimes;": '\U00002A37', - "Ouml;": '\U000000D6', - "OverBar;": '\U0000203E', - "OverBrace;": '\U000023DE', - "OverBracket;": '\U000023B4', - "OverParenthesis;": '\U000023DC', - "PartialD;": '\U00002202', - "Pcy;": '\U0000041F', - "Pfr;": '\U0001D513', - "Phi;": '\U000003A6', - "Pi;": '\U000003A0', - "PlusMinus;": '\U000000B1', - "Poincareplane;": '\U0000210C', - "Popf;": '\U00002119', - "Pr;": '\U00002ABB', - "Precedes;": '\U0000227A', - "PrecedesEqual;": '\U00002AAF', - "PrecedesSlantEqual;": '\U0000227C', - "PrecedesTilde;": '\U0000227E', - "Prime;": '\U00002033', - "Product;": '\U0000220F', - "Proportion;": '\U00002237', - "Proportional;": '\U0000221D', - "Pscr;": '\U0001D4AB', - "Psi;": '\U000003A8', - "QUOT;": '\U00000022', - "Qfr;": '\U0001D514', - "Qopf;": '\U0000211A', - "Qscr;": '\U0001D4AC', - "RBarr;": '\U00002910', - "REG;": '\U000000AE', - "Racute;": '\U00000154', - "Rang;": '\U000027EB', - "Rarr;": '\U000021A0', - "Rarrtl;": '\U00002916', - "Rcaron;": '\U00000158', - "Rcedil;": '\U00000156', - "Rcy;": '\U00000420', - "Re;": '\U0000211C', - "ReverseElement;": '\U0000220B', - "ReverseEquilibrium;": '\U000021CB', - "ReverseUpEquilibrium;": '\U0000296F', - "Rfr;": '\U0000211C', - "Rho;": '\U000003A1', - "RightAngleBracket;": '\U000027E9', - "RightArrow;": '\U00002192', - "RightArrowBar;": '\U000021E5', - "RightArrowLeftArrow;": '\U000021C4', - "RightCeiling;": '\U00002309', - "RightDoubleBracket;": '\U000027E7', - "RightDownTeeVector;": '\U0000295D', - "RightDownVector;": '\U000021C2', - "RightDownVectorBar;": '\U00002955', - "RightFloor;": '\U0000230B', - "RightTee;": '\U000022A2', - "RightTeeArrow;": '\U000021A6', - "RightTeeVector;": '\U0000295B', - "RightTriangle;": '\U000022B3', - "RightTriangleBar;": '\U000029D0', - "RightTriangleEqual;": '\U000022B5', - "RightUpDownVector;": '\U0000294F', - "RightUpTeeVector;": '\U0000295C', - "RightUpVector;": '\U000021BE', - "RightUpVectorBar;": '\U00002954', - "RightVector;": '\U000021C0', - "RightVectorBar;": '\U00002953', - "Rightarrow;": '\U000021D2', - "Ropf;": '\U0000211D', - "RoundImplies;": '\U00002970', - "Rrightarrow;": '\U000021DB', - "Rscr;": '\U0000211B', - "Rsh;": '\U000021B1', - "RuleDelayed;": '\U000029F4', - "SHCHcy;": '\U00000429', - "SHcy;": '\U00000428', - "SOFTcy;": '\U0000042C', - "Sacute;": '\U0000015A', - "Sc;": '\U00002ABC', - "Scaron;": '\U00000160', - "Scedil;": '\U0000015E', - "Scirc;": '\U0000015C', - "Scy;": '\U00000421', - "Sfr;": '\U0001D516', - "ShortDownArrow;": '\U00002193', - "ShortLeftArrow;": '\U00002190', - "ShortRightArrow;": '\U00002192', - "ShortUpArrow;": '\U00002191', - "Sigma;": '\U000003A3', - "SmallCircle;": '\U00002218', - "Sopf;": '\U0001D54A', - "Sqrt;": '\U0000221A', - "Square;": '\U000025A1', - "SquareIntersection;": '\U00002293', - "SquareSubset;": '\U0000228F', - "SquareSubsetEqual;": '\U00002291', - "SquareSuperset;": '\U00002290', - "SquareSupersetEqual;": '\U00002292', - "SquareUnion;": '\U00002294', - "Sscr;": '\U0001D4AE', - "Star;": '\U000022C6', - "Sub;": '\U000022D0', - "Subset;": '\U000022D0', - "SubsetEqual;": '\U00002286', - "Succeeds;": '\U0000227B', - "SucceedsEqual;": '\U00002AB0', - "SucceedsSlantEqual;": '\U0000227D', - "SucceedsTilde;": '\U0000227F', - "SuchThat;": '\U0000220B', - "Sum;": '\U00002211', - "Sup;": '\U000022D1', - "Superset;": '\U00002283', - "SupersetEqual;": '\U00002287', - "Supset;": '\U000022D1', - "THORN;": '\U000000DE', - "TRADE;": '\U00002122', - "TSHcy;": '\U0000040B', - "TScy;": '\U00000426', - "Tab;": '\U00000009', - "Tau;": '\U000003A4', - "Tcaron;": '\U00000164', - "Tcedil;": '\U00000162', - "Tcy;": '\U00000422', - "Tfr;": '\U0001D517', - "Therefore;": '\U00002234', - "Theta;": '\U00000398', - "ThinSpace;": '\U00002009', - "Tilde;": '\U0000223C', - "TildeEqual;": '\U00002243', - "TildeFullEqual;": '\U00002245', - "TildeTilde;": '\U00002248', - "Topf;": '\U0001D54B', - "TripleDot;": '\U000020DB', - "Tscr;": '\U0001D4AF', - "Tstrok;": '\U00000166', - "Uacute;": '\U000000DA', - "Uarr;": '\U0000219F', - "Uarrocir;": '\U00002949', - "Ubrcy;": '\U0000040E', - "Ubreve;": '\U0000016C', - "Ucirc;": '\U000000DB', - "Ucy;": '\U00000423', - "Udblac;": '\U00000170', - "Ufr;": '\U0001D518', - "Ugrave;": '\U000000D9', - "Umacr;": '\U0000016A', - "UnderBar;": '\U0000005F', - "UnderBrace;": '\U000023DF', - "UnderBracket;": '\U000023B5', - "UnderParenthesis;": '\U000023DD', - "Union;": '\U000022C3', - "UnionPlus;": '\U0000228E', - "Uogon;": '\U00000172', - "Uopf;": '\U0001D54C', - "UpArrow;": '\U00002191', - "UpArrowBar;": '\U00002912', - "UpArrowDownArrow;": '\U000021C5', - "UpDownArrow;": '\U00002195', - "UpEquilibrium;": '\U0000296E', - "UpTee;": '\U000022A5', - "UpTeeArrow;": '\U000021A5', - "Uparrow;": '\U000021D1', - "Updownarrow;": '\U000021D5', - "UpperLeftArrow;": '\U00002196', - "UpperRightArrow;": '\U00002197', - "Upsi;": '\U000003D2', - "Upsilon;": '\U000003A5', - "Uring;": '\U0000016E', - "Uscr;": '\U0001D4B0', - "Utilde;": '\U00000168', - "Uuml;": '\U000000DC', - "VDash;": '\U000022AB', - "Vbar;": '\U00002AEB', - "Vcy;": '\U00000412', - "Vdash;": '\U000022A9', - "Vdashl;": '\U00002AE6', - "Vee;": '\U000022C1', - "Verbar;": '\U00002016', - "Vert;": '\U00002016', - "VerticalBar;": '\U00002223', - "VerticalLine;": '\U0000007C', - "VerticalSeparator;": '\U00002758', - "VerticalTilde;": '\U00002240', - "VeryThinSpace;": '\U0000200A', - "Vfr;": '\U0001D519', - "Vopf;": '\U0001D54D', - "Vscr;": '\U0001D4B1', - "Vvdash;": '\U000022AA', - "Wcirc;": '\U00000174', - "Wedge;": '\U000022C0', - "Wfr;": '\U0001D51A', - "Wopf;": '\U0001D54E', - "Wscr;": '\U0001D4B2', - "Xfr;": '\U0001D51B', - "Xi;": '\U0000039E', - "Xopf;": '\U0001D54F', - "Xscr;": '\U0001D4B3', - "YAcy;": '\U0000042F', - "YIcy;": '\U00000407', - "YUcy;": '\U0000042E', - "Yacute;": '\U000000DD', - "Ycirc;": '\U00000176', - "Ycy;": '\U0000042B', - "Yfr;": '\U0001D51C', - "Yopf;": '\U0001D550', - "Yscr;": '\U0001D4B4', - "Yuml;": '\U00000178', - "ZHcy;": '\U00000416', - "Zacute;": '\U00000179', - "Zcaron;": '\U0000017D', - "Zcy;": '\U00000417', - "Zdot;": '\U0000017B', - "ZeroWidthSpace;": '\U0000200B', - "Zeta;": '\U00000396', - "Zfr;": '\U00002128', - "Zopf;": '\U00002124', - "Zscr;": '\U0001D4B5', - "aacute;": '\U000000E1', - "abreve;": '\U00000103', - "ac;": '\U0000223E', - "acd;": '\U0000223F', - "acirc;": '\U000000E2', - "acute;": '\U000000B4', - "acy;": '\U00000430', - "aelig;": '\U000000E6', - "af;": '\U00002061', - "afr;": '\U0001D51E', - "agrave;": '\U000000E0', - "alefsym;": '\U00002135', - "aleph;": '\U00002135', - "alpha;": '\U000003B1', - "amacr;": '\U00000101', - "amalg;": '\U00002A3F', - "amp;": '\U00000026', - "and;": '\U00002227', - "andand;": '\U00002A55', - "andd;": '\U00002A5C', - "andslope;": '\U00002A58', - "andv;": '\U00002A5A', - "ang;": '\U00002220', - "ange;": '\U000029A4', - "angle;": '\U00002220', - "angmsd;": '\U00002221', - "angmsdaa;": '\U000029A8', - "angmsdab;": '\U000029A9', - "angmsdac;": '\U000029AA', - "angmsdad;": '\U000029AB', - "angmsdae;": '\U000029AC', - "angmsdaf;": '\U000029AD', - "angmsdag;": '\U000029AE', - "angmsdah;": '\U000029AF', - "angrt;": '\U0000221F', - "angrtvb;": '\U000022BE', - "angrtvbd;": '\U0000299D', - "angsph;": '\U00002222', - "angst;": '\U000000C5', - "angzarr;": '\U0000237C', - "aogon;": '\U00000105', - "aopf;": '\U0001D552', - "ap;": '\U00002248', - "apE;": '\U00002A70', - "apacir;": '\U00002A6F', - "ape;": '\U0000224A', - "apid;": '\U0000224B', - "apos;": '\U00000027', - "approx;": '\U00002248', - "approxeq;": '\U0000224A', - "aring;": '\U000000E5', - "ascr;": '\U0001D4B6', - "ast;": '\U0000002A', - "asymp;": '\U00002248', - "asympeq;": '\U0000224D', - "atilde;": '\U000000E3', - "auml;": '\U000000E4', - "awconint;": '\U00002233', - "awint;": '\U00002A11', - "bNot;": '\U00002AED', - "backcong;": '\U0000224C', - "backepsilon;": '\U000003F6', - "backprime;": '\U00002035', - "backsim;": '\U0000223D', - "backsimeq;": '\U000022CD', - "barvee;": '\U000022BD', - "barwed;": '\U00002305', - "barwedge;": '\U00002305', - "bbrk;": '\U000023B5', - "bbrktbrk;": '\U000023B6', - "bcong;": '\U0000224C', - "bcy;": '\U00000431', - "bdquo;": '\U0000201E', - "becaus;": '\U00002235', - "because;": '\U00002235', - "bemptyv;": '\U000029B0', - "bepsi;": '\U000003F6', - "bernou;": '\U0000212C', - "beta;": '\U000003B2', - "beth;": '\U00002136', - "between;": '\U0000226C', - "bfr;": '\U0001D51F', - "bigcap;": '\U000022C2', - "bigcirc;": '\U000025EF', - "bigcup;": '\U000022C3', - "bigodot;": '\U00002A00', - "bigoplus;": '\U00002A01', - "bigotimes;": '\U00002A02', - "bigsqcup;": '\U00002A06', - "bigstar;": '\U00002605', - "bigtriangledown;": '\U000025BD', - "bigtriangleup;": '\U000025B3', - "biguplus;": '\U00002A04', - "bigvee;": '\U000022C1', - "bigwedge;": '\U000022C0', - "bkarow;": '\U0000290D', - "blacklozenge;": '\U000029EB', - "blacksquare;": '\U000025AA', - "blacktriangle;": '\U000025B4', - "blacktriangledown;": '\U000025BE', - "blacktriangleleft;": '\U000025C2', - "blacktriangleright;": '\U000025B8', - "blank;": '\U00002423', - "blk12;": '\U00002592', - "blk14;": '\U00002591', - "blk34;": '\U00002593', - "block;": '\U00002588', - "bnot;": '\U00002310', - "bopf;": '\U0001D553', - "bot;": '\U000022A5', - "bottom;": '\U000022A5', - "bowtie;": '\U000022C8', - "boxDL;": '\U00002557', - "boxDR;": '\U00002554', - "boxDl;": '\U00002556', - "boxDr;": '\U00002553', - "boxH;": '\U00002550', - "boxHD;": '\U00002566', - "boxHU;": '\U00002569', - "boxHd;": '\U00002564', - "boxHu;": '\U00002567', - "boxUL;": '\U0000255D', - "boxUR;": '\U0000255A', - "boxUl;": '\U0000255C', - "boxUr;": '\U00002559', - "boxV;": '\U00002551', - "boxVH;": '\U0000256C', - "boxVL;": '\U00002563', - "boxVR;": '\U00002560', - "boxVh;": '\U0000256B', - "boxVl;": '\U00002562', - "boxVr;": '\U0000255F', - "boxbox;": '\U000029C9', - "boxdL;": '\U00002555', - "boxdR;": '\U00002552', - "boxdl;": '\U00002510', - "boxdr;": '\U0000250C', - "boxh;": '\U00002500', - "boxhD;": '\U00002565', - "boxhU;": '\U00002568', - "boxhd;": '\U0000252C', - "boxhu;": '\U00002534', - "boxminus;": '\U0000229F', - "boxplus;": '\U0000229E', - "boxtimes;": '\U000022A0', - "boxuL;": '\U0000255B', - "boxuR;": '\U00002558', - "boxul;": '\U00002518', - "boxur;": '\U00002514', - "boxv;": '\U00002502', - "boxvH;": '\U0000256A', - "boxvL;": '\U00002561', - "boxvR;": '\U0000255E', - "boxvh;": '\U0000253C', - "boxvl;": '\U00002524', - "boxvr;": '\U0000251C', - "bprime;": '\U00002035', - "breve;": '\U000002D8', - "brvbar;": '\U000000A6', - "bscr;": '\U0001D4B7', - "bsemi;": '\U0000204F', - "bsim;": '\U0000223D', - "bsime;": '\U000022CD', - "bsol;": '\U0000005C', - "bsolb;": '\U000029C5', - "bsolhsub;": '\U000027C8', - "bull;": '\U00002022', - "bullet;": '\U00002022', - "bump;": '\U0000224E', - "bumpE;": '\U00002AAE', - "bumpe;": '\U0000224F', - "bumpeq;": '\U0000224F', - "cacute;": '\U00000107', - "cap;": '\U00002229', - "capand;": '\U00002A44', - "capbrcup;": '\U00002A49', - "capcap;": '\U00002A4B', - "capcup;": '\U00002A47', - "capdot;": '\U00002A40', - "caret;": '\U00002041', - "caron;": '\U000002C7', - "ccaps;": '\U00002A4D', - "ccaron;": '\U0000010D', - "ccedil;": '\U000000E7', - "ccirc;": '\U00000109', - "ccups;": '\U00002A4C', - "ccupssm;": '\U00002A50', - "cdot;": '\U0000010B', - "cedil;": '\U000000B8', - "cemptyv;": '\U000029B2', - "cent;": '\U000000A2', - "centerdot;": '\U000000B7', - "cfr;": '\U0001D520', - "chcy;": '\U00000447', - "check;": '\U00002713', - "checkmark;": '\U00002713', - "chi;": '\U000003C7', - "cir;": '\U000025CB', - "cirE;": '\U000029C3', - "circ;": '\U000002C6', - "circeq;": '\U00002257', - "circlearrowleft;": '\U000021BA', - "circlearrowright;": '\U000021BB', - "circledR;": '\U000000AE', - "circledS;": '\U000024C8', - "circledast;": '\U0000229B', - "circledcirc;": '\U0000229A', - "circleddash;": '\U0000229D', - "cire;": '\U00002257', - "cirfnint;": '\U00002A10', - "cirmid;": '\U00002AEF', - "cirscir;": '\U000029C2', - "clubs;": '\U00002663', - "clubsuit;": '\U00002663', - "colon;": '\U0000003A', - "colone;": '\U00002254', - "coloneq;": '\U00002254', - "comma;": '\U0000002C', - "commat;": '\U00000040', - "comp;": '\U00002201', - "compfn;": '\U00002218', - "complement;": '\U00002201', - "complexes;": '\U00002102', - "cong;": '\U00002245', - "congdot;": '\U00002A6D', - "conint;": '\U0000222E', - "copf;": '\U0001D554', - "coprod;": '\U00002210', - "copy;": '\U000000A9', - "copysr;": '\U00002117', - "crarr;": '\U000021B5', - "cross;": '\U00002717', - "cscr;": '\U0001D4B8', - "csub;": '\U00002ACF', - "csube;": '\U00002AD1', - "csup;": '\U00002AD0', - "csupe;": '\U00002AD2', - "ctdot;": '\U000022EF', - "cudarrl;": '\U00002938', - "cudarrr;": '\U00002935', - "cuepr;": '\U000022DE', - "cuesc;": '\U000022DF', - "cularr;": '\U000021B6', - "cularrp;": '\U0000293D', - "cup;": '\U0000222A', - "cupbrcap;": '\U00002A48', - "cupcap;": '\U00002A46', - "cupcup;": '\U00002A4A', - "cupdot;": '\U0000228D', - "cupor;": '\U00002A45', - "curarr;": '\U000021B7', - "curarrm;": '\U0000293C', - "curlyeqprec;": '\U000022DE', - "curlyeqsucc;": '\U000022DF', - "curlyvee;": '\U000022CE', - "curlywedge;": '\U000022CF', - "curren;": '\U000000A4', - "curvearrowleft;": '\U000021B6', - "curvearrowright;": '\U000021B7', - "cuvee;": '\U000022CE', - "cuwed;": '\U000022CF', - "cwconint;": '\U00002232', - "cwint;": '\U00002231', - "cylcty;": '\U0000232D', - "dArr;": '\U000021D3', - "dHar;": '\U00002965', - "dagger;": '\U00002020', - "daleth;": '\U00002138', - "darr;": '\U00002193', - "dash;": '\U00002010', - "dashv;": '\U000022A3', - "dbkarow;": '\U0000290F', - "dblac;": '\U000002DD', - "dcaron;": '\U0000010F', - "dcy;": '\U00000434', - "dd;": '\U00002146', - "ddagger;": '\U00002021', - "ddarr;": '\U000021CA', - "ddotseq;": '\U00002A77', - "deg;": '\U000000B0', - "delta;": '\U000003B4', - "demptyv;": '\U000029B1', - "dfisht;": '\U0000297F', - "dfr;": '\U0001D521', - "dharl;": '\U000021C3', - "dharr;": '\U000021C2', - "diam;": '\U000022C4', - "diamond;": '\U000022C4', - "diamondsuit;": '\U00002666', - "diams;": '\U00002666', - "die;": '\U000000A8', - "digamma;": '\U000003DD', - "disin;": '\U000022F2', - "div;": '\U000000F7', - "divide;": '\U000000F7', - "divideontimes;": '\U000022C7', - "divonx;": '\U000022C7', - "djcy;": '\U00000452', - "dlcorn;": '\U0000231E', - "dlcrop;": '\U0000230D', - "dollar;": '\U00000024', - "dopf;": '\U0001D555', - "dot;": '\U000002D9', - "doteq;": '\U00002250', - "doteqdot;": '\U00002251', - "dotminus;": '\U00002238', - "dotplus;": '\U00002214', - "dotsquare;": '\U000022A1', - "doublebarwedge;": '\U00002306', - "downarrow;": '\U00002193', - "downdownarrows;": '\U000021CA', - "downharpoonleft;": '\U000021C3', - "downharpoonright;": '\U000021C2', - "drbkarow;": '\U00002910', - "drcorn;": '\U0000231F', - "drcrop;": '\U0000230C', - "dscr;": '\U0001D4B9', - "dscy;": '\U00000455', - "dsol;": '\U000029F6', - "dstrok;": '\U00000111', - "dtdot;": '\U000022F1', - "dtri;": '\U000025BF', - "dtrif;": '\U000025BE', - "duarr;": '\U000021F5', - "duhar;": '\U0000296F', - "dwangle;": '\U000029A6', - "dzcy;": '\U0000045F', - "dzigrarr;": '\U000027FF', - "eDDot;": '\U00002A77', - "eDot;": '\U00002251', - "eacute;": '\U000000E9', - "easter;": '\U00002A6E', - "ecaron;": '\U0000011B', - "ecir;": '\U00002256', - "ecirc;": '\U000000EA', - "ecolon;": '\U00002255', - "ecy;": '\U0000044D', - "edot;": '\U00000117', - "ee;": '\U00002147', - "efDot;": '\U00002252', - "efr;": '\U0001D522', - "eg;": '\U00002A9A', - "egrave;": '\U000000E8', - "egs;": '\U00002A96', - "egsdot;": '\U00002A98', - "el;": '\U00002A99', - "elinters;": '\U000023E7', - "ell;": '\U00002113', - "els;": '\U00002A95', - "elsdot;": '\U00002A97', - "emacr;": '\U00000113', - "empty;": '\U00002205', - "emptyset;": '\U00002205', - "emptyv;": '\U00002205', - "emsp;": '\U00002003', - "emsp13;": '\U00002004', - "emsp14;": '\U00002005', - "eng;": '\U0000014B', - "ensp;": '\U00002002', - "eogon;": '\U00000119', - "eopf;": '\U0001D556', - "epar;": '\U000022D5', - "eparsl;": '\U000029E3', - "eplus;": '\U00002A71', - "epsi;": '\U000003B5', - "epsilon;": '\U000003B5', - "epsiv;": '\U000003F5', - "eqcirc;": '\U00002256', - "eqcolon;": '\U00002255', - "eqsim;": '\U00002242', - "eqslantgtr;": '\U00002A96', - "eqslantless;": '\U00002A95', - "equals;": '\U0000003D', - "equest;": '\U0000225F', - "equiv;": '\U00002261', - "equivDD;": '\U00002A78', - "eqvparsl;": '\U000029E5', - "erDot;": '\U00002253', - "erarr;": '\U00002971', - "escr;": '\U0000212F', - "esdot;": '\U00002250', - "esim;": '\U00002242', - "eta;": '\U000003B7', - "eth;": '\U000000F0', - "euml;": '\U000000EB', - "euro;": '\U000020AC', - "excl;": '\U00000021', - "exist;": '\U00002203', - "expectation;": '\U00002130', - "exponentiale;": '\U00002147', - "fallingdotseq;": '\U00002252', - "fcy;": '\U00000444', - "female;": '\U00002640', - "ffilig;": '\U0000FB03', - "fflig;": '\U0000FB00', - "ffllig;": '\U0000FB04', - "ffr;": '\U0001D523', - "filig;": '\U0000FB01', - "flat;": '\U0000266D', - "fllig;": '\U0000FB02', - "fltns;": '\U000025B1', - "fnof;": '\U00000192', - "fopf;": '\U0001D557', - "forall;": '\U00002200', - "fork;": '\U000022D4', - "forkv;": '\U00002AD9', - "fpartint;": '\U00002A0D', - "frac12;": '\U000000BD', - "frac13;": '\U00002153', - "frac14;": '\U000000BC', - "frac15;": '\U00002155', - "frac16;": '\U00002159', - "frac18;": '\U0000215B', - "frac23;": '\U00002154', - "frac25;": '\U00002156', - "frac34;": '\U000000BE', - "frac35;": '\U00002157', - "frac38;": '\U0000215C', - "frac45;": '\U00002158', - "frac56;": '\U0000215A', - "frac58;": '\U0000215D', - "frac78;": '\U0000215E', - "frasl;": '\U00002044', - "frown;": '\U00002322', - "fscr;": '\U0001D4BB', - "gE;": '\U00002267', - "gEl;": '\U00002A8C', - "gacute;": '\U000001F5', - "gamma;": '\U000003B3', - "gammad;": '\U000003DD', - "gap;": '\U00002A86', - "gbreve;": '\U0000011F', - "gcirc;": '\U0000011D', - "gcy;": '\U00000433', - "gdot;": '\U00000121', - "ge;": '\U00002265', - "gel;": '\U000022DB', - "geq;": '\U00002265', - "geqq;": '\U00002267', - "geqslant;": '\U00002A7E', - "ges;": '\U00002A7E', - "gescc;": '\U00002AA9', - "gesdot;": '\U00002A80', - "gesdoto;": '\U00002A82', - "gesdotol;": '\U00002A84', - "gesles;": '\U00002A94', - "gfr;": '\U0001D524', - "gg;": '\U0000226B', - "ggg;": '\U000022D9', - "gimel;": '\U00002137', - "gjcy;": '\U00000453', - "gl;": '\U00002277', - "glE;": '\U00002A92', - "gla;": '\U00002AA5', - "glj;": '\U00002AA4', - "gnE;": '\U00002269', - "gnap;": '\U00002A8A', - "gnapprox;": '\U00002A8A', - "gne;": '\U00002A88', - "gneq;": '\U00002A88', - "gneqq;": '\U00002269', - "gnsim;": '\U000022E7', - "gopf;": '\U0001D558', - "grave;": '\U00000060', - "gscr;": '\U0000210A', - "gsim;": '\U00002273', - "gsime;": '\U00002A8E', - "gsiml;": '\U00002A90', - "gt;": '\U0000003E', - "gtcc;": '\U00002AA7', - "gtcir;": '\U00002A7A', - "gtdot;": '\U000022D7', - "gtlPar;": '\U00002995', - "gtquest;": '\U00002A7C', - "gtrapprox;": '\U00002A86', - "gtrarr;": '\U00002978', - "gtrdot;": '\U000022D7', - "gtreqless;": '\U000022DB', - "gtreqqless;": '\U00002A8C', - "gtrless;": '\U00002277', - "gtrsim;": '\U00002273', - "hArr;": '\U000021D4', - "hairsp;": '\U0000200A', - "half;": '\U000000BD', - "hamilt;": '\U0000210B', - "hardcy;": '\U0000044A', - "harr;": '\U00002194', - "harrcir;": '\U00002948', - "harrw;": '\U000021AD', - "hbar;": '\U0000210F', - "hcirc;": '\U00000125', - "hearts;": '\U00002665', - "heartsuit;": '\U00002665', - "hellip;": '\U00002026', - "hercon;": '\U000022B9', - "hfr;": '\U0001D525', - "hksearow;": '\U00002925', - "hkswarow;": '\U00002926', - "hoarr;": '\U000021FF', - "homtht;": '\U0000223B', - "hookleftarrow;": '\U000021A9', - "hookrightarrow;": '\U000021AA', - "hopf;": '\U0001D559', - "horbar;": '\U00002015', - "hscr;": '\U0001D4BD', - "hslash;": '\U0000210F', - "hstrok;": '\U00000127', - "hybull;": '\U00002043', - "hyphen;": '\U00002010', - "iacute;": '\U000000ED', - "ic;": '\U00002063', - "icirc;": '\U000000EE', - "icy;": '\U00000438', - "iecy;": '\U00000435', - "iexcl;": '\U000000A1', - "iff;": '\U000021D4', - "ifr;": '\U0001D526', - "igrave;": '\U000000EC', - "ii;": '\U00002148', - "iiiint;": '\U00002A0C', - "iiint;": '\U0000222D', - "iinfin;": '\U000029DC', - "iiota;": '\U00002129', - "ijlig;": '\U00000133', - "imacr;": '\U0000012B', - "image;": '\U00002111', - "imagline;": '\U00002110', - "imagpart;": '\U00002111', - "imath;": '\U00000131', - "imof;": '\U000022B7', - "imped;": '\U000001B5', - "in;": '\U00002208', - "incare;": '\U00002105', - "infin;": '\U0000221E', - "infintie;": '\U000029DD', - "inodot;": '\U00000131', - "int;": '\U0000222B', - "intcal;": '\U000022BA', - "integers;": '\U00002124', - "intercal;": '\U000022BA', - "intlarhk;": '\U00002A17', - "intprod;": '\U00002A3C', - "iocy;": '\U00000451', - "iogon;": '\U0000012F', - "iopf;": '\U0001D55A', - "iota;": '\U000003B9', - "iprod;": '\U00002A3C', - "iquest;": '\U000000BF', - "iscr;": '\U0001D4BE', - "isin;": '\U00002208', - "isinE;": '\U000022F9', - "isindot;": '\U000022F5', - "isins;": '\U000022F4', - "isinsv;": '\U000022F3', - "isinv;": '\U00002208', - "it;": '\U00002062', - "itilde;": '\U00000129', - "iukcy;": '\U00000456', - "iuml;": '\U000000EF', - "jcirc;": '\U00000135', - "jcy;": '\U00000439', - "jfr;": '\U0001D527', - "jmath;": '\U00000237', - "jopf;": '\U0001D55B', - "jscr;": '\U0001D4BF', - "jsercy;": '\U00000458', - "jukcy;": '\U00000454', - "kappa;": '\U000003BA', - "kappav;": '\U000003F0', - "kcedil;": '\U00000137', - "kcy;": '\U0000043A', - "kfr;": '\U0001D528', - "kgreen;": '\U00000138', - "khcy;": '\U00000445', - "kjcy;": '\U0000045C', - "kopf;": '\U0001D55C', - "kscr;": '\U0001D4C0', - "lAarr;": '\U000021DA', - "lArr;": '\U000021D0', - "lAtail;": '\U0000291B', - "lBarr;": '\U0000290E', - "lE;": '\U00002266', - "lEg;": '\U00002A8B', - "lHar;": '\U00002962', - "lacute;": '\U0000013A', - "laemptyv;": '\U000029B4', - "lagran;": '\U00002112', - "lambda;": '\U000003BB', - "lang;": '\U000027E8', - "langd;": '\U00002991', - "langle;": '\U000027E8', - "lap;": '\U00002A85', - "laquo;": '\U000000AB', - "larr;": '\U00002190', - "larrb;": '\U000021E4', - "larrbfs;": '\U0000291F', - "larrfs;": '\U0000291D', - "larrhk;": '\U000021A9', - "larrlp;": '\U000021AB', - "larrpl;": '\U00002939', - "larrsim;": '\U00002973', - "larrtl;": '\U000021A2', - "lat;": '\U00002AAB', - "latail;": '\U00002919', - "late;": '\U00002AAD', - "lbarr;": '\U0000290C', - "lbbrk;": '\U00002772', - "lbrace;": '\U0000007B', - "lbrack;": '\U0000005B', - "lbrke;": '\U0000298B', - "lbrksld;": '\U0000298F', - "lbrkslu;": '\U0000298D', - "lcaron;": '\U0000013E', - "lcedil;": '\U0000013C', - "lceil;": '\U00002308', - "lcub;": '\U0000007B', - "lcy;": '\U0000043B', - "ldca;": '\U00002936', - "ldquo;": '\U0000201C', - "ldquor;": '\U0000201E', - "ldrdhar;": '\U00002967', - "ldrushar;": '\U0000294B', - "ldsh;": '\U000021B2', - "le;": '\U00002264', - "leftarrow;": '\U00002190', - "leftarrowtail;": '\U000021A2', - "leftharpoondown;": '\U000021BD', - "leftharpoonup;": '\U000021BC', - "leftleftarrows;": '\U000021C7', - "leftrightarrow;": '\U00002194', - "leftrightarrows;": '\U000021C6', - "leftrightharpoons;": '\U000021CB', - "leftrightsquigarrow;": '\U000021AD', - "leftthreetimes;": '\U000022CB', - "leg;": '\U000022DA', - "leq;": '\U00002264', - "leqq;": '\U00002266', - "leqslant;": '\U00002A7D', - "les;": '\U00002A7D', - "lescc;": '\U00002AA8', - "lesdot;": '\U00002A7F', - "lesdoto;": '\U00002A81', - "lesdotor;": '\U00002A83', - "lesges;": '\U00002A93', - "lessapprox;": '\U00002A85', - "lessdot;": '\U000022D6', - "lesseqgtr;": '\U000022DA', - "lesseqqgtr;": '\U00002A8B', - "lessgtr;": '\U00002276', - "lesssim;": '\U00002272', - "lfisht;": '\U0000297C', - "lfloor;": '\U0000230A', - "lfr;": '\U0001D529', - "lg;": '\U00002276', - "lgE;": '\U00002A91', - "lhard;": '\U000021BD', - "lharu;": '\U000021BC', - "lharul;": '\U0000296A', - "lhblk;": '\U00002584', - "ljcy;": '\U00000459', - "ll;": '\U0000226A', - "llarr;": '\U000021C7', - "llcorner;": '\U0000231E', - "llhard;": '\U0000296B', - "lltri;": '\U000025FA', - "lmidot;": '\U00000140', - "lmoust;": '\U000023B0', - "lmoustache;": '\U000023B0', - "lnE;": '\U00002268', - "lnap;": '\U00002A89', - "lnapprox;": '\U00002A89', - "lne;": '\U00002A87', - "lneq;": '\U00002A87', - "lneqq;": '\U00002268', - "lnsim;": '\U000022E6', - "loang;": '\U000027EC', - "loarr;": '\U000021FD', - "lobrk;": '\U000027E6', - "longleftarrow;": '\U000027F5', - "longleftrightarrow;": '\U000027F7', - "longmapsto;": '\U000027FC', - "longrightarrow;": '\U000027F6', - "looparrowleft;": '\U000021AB', - "looparrowright;": '\U000021AC', - "lopar;": '\U00002985', - "lopf;": '\U0001D55D', - "loplus;": '\U00002A2D', - "lotimes;": '\U00002A34', - "lowast;": '\U00002217', - "lowbar;": '\U0000005F', - "loz;": '\U000025CA', - "lozenge;": '\U000025CA', - "lozf;": '\U000029EB', - "lpar;": '\U00000028', - "lparlt;": '\U00002993', - "lrarr;": '\U000021C6', - "lrcorner;": '\U0000231F', - "lrhar;": '\U000021CB', - "lrhard;": '\U0000296D', - "lrm;": '\U0000200E', - "lrtri;": '\U000022BF', - "lsaquo;": '\U00002039', - "lscr;": '\U0001D4C1', - "lsh;": '\U000021B0', - "lsim;": '\U00002272', - "lsime;": '\U00002A8D', - "lsimg;": '\U00002A8F', - "lsqb;": '\U0000005B', - "lsquo;": '\U00002018', - "lsquor;": '\U0000201A', - "lstrok;": '\U00000142', - "lt;": '\U0000003C', - "ltcc;": '\U00002AA6', - "ltcir;": '\U00002A79', - "ltdot;": '\U000022D6', - "lthree;": '\U000022CB', - "ltimes;": '\U000022C9', - "ltlarr;": '\U00002976', - "ltquest;": '\U00002A7B', - "ltrPar;": '\U00002996', - "ltri;": '\U000025C3', - "ltrie;": '\U000022B4', - "ltrif;": '\U000025C2', - "lurdshar;": '\U0000294A', - "luruhar;": '\U00002966', - "mDDot;": '\U0000223A', - "macr;": '\U000000AF', - "male;": '\U00002642', - "malt;": '\U00002720', - "maltese;": '\U00002720', - "map;": '\U000021A6', - "mapsto;": '\U000021A6', - "mapstodown;": '\U000021A7', - "mapstoleft;": '\U000021A4', - "mapstoup;": '\U000021A5', - "marker;": '\U000025AE', - "mcomma;": '\U00002A29', - "mcy;": '\U0000043C', - "mdash;": '\U00002014', - "measuredangle;": '\U00002221', - "mfr;": '\U0001D52A', - "mho;": '\U00002127', - "micro;": '\U000000B5', - "mid;": '\U00002223', - "midast;": '\U0000002A', - "midcir;": '\U00002AF0', - "middot;": '\U000000B7', - "minus;": '\U00002212', - "minusb;": '\U0000229F', - "minusd;": '\U00002238', - "minusdu;": '\U00002A2A', - "mlcp;": '\U00002ADB', - "mldr;": '\U00002026', - "mnplus;": '\U00002213', - "models;": '\U000022A7', - "mopf;": '\U0001D55E', - "mp;": '\U00002213', - "mscr;": '\U0001D4C2', - "mstpos;": '\U0000223E', - "mu;": '\U000003BC', - "multimap;": '\U000022B8', - "mumap;": '\U000022B8', - "nLeftarrow;": '\U000021CD', - "nLeftrightarrow;": '\U000021CE', - "nRightarrow;": '\U000021CF', - "nVDash;": '\U000022AF', - "nVdash;": '\U000022AE', - "nabla;": '\U00002207', - "nacute;": '\U00000144', - "nap;": '\U00002249', - "napos;": '\U00000149', - "napprox;": '\U00002249', - "natur;": '\U0000266E', - "natural;": '\U0000266E', - "naturals;": '\U00002115', - "nbsp;": '\U000000A0', - "ncap;": '\U00002A43', - "ncaron;": '\U00000148', - "ncedil;": '\U00000146', - "ncong;": '\U00002247', - "ncup;": '\U00002A42', - "ncy;": '\U0000043D', - "ndash;": '\U00002013', - "ne;": '\U00002260', - "neArr;": '\U000021D7', - "nearhk;": '\U00002924', - "nearr;": '\U00002197', - "nearrow;": '\U00002197', - "nequiv;": '\U00002262', - "nesear;": '\U00002928', - "nexist;": '\U00002204', - "nexists;": '\U00002204', - "nfr;": '\U0001D52B', - "nge;": '\U00002271', - "ngeq;": '\U00002271', - "ngsim;": '\U00002275', - "ngt;": '\U0000226F', - "ngtr;": '\U0000226F', - "nhArr;": '\U000021CE', - "nharr;": '\U000021AE', - "nhpar;": '\U00002AF2', - "ni;": '\U0000220B', - "nis;": '\U000022FC', - "nisd;": '\U000022FA', - "niv;": '\U0000220B', - "njcy;": '\U0000045A', - "nlArr;": '\U000021CD', - "nlarr;": '\U0000219A', - "nldr;": '\U00002025', - "nle;": '\U00002270', - "nleftarrow;": '\U0000219A', - "nleftrightarrow;": '\U000021AE', - "nleq;": '\U00002270', - "nless;": '\U0000226E', - "nlsim;": '\U00002274', - "nlt;": '\U0000226E', - "nltri;": '\U000022EA', - "nltrie;": '\U000022EC', - "nmid;": '\U00002224', - "nopf;": '\U0001D55F', - "not;": '\U000000AC', - "notin;": '\U00002209', - "notinva;": '\U00002209', - "notinvb;": '\U000022F7', - "notinvc;": '\U000022F6', - "notni;": '\U0000220C', - "notniva;": '\U0000220C', - "notnivb;": '\U000022FE', - "notnivc;": '\U000022FD', - "npar;": '\U00002226', - "nparallel;": '\U00002226', - "npolint;": '\U00002A14', - "npr;": '\U00002280', - "nprcue;": '\U000022E0', - "nprec;": '\U00002280', - "nrArr;": '\U000021CF', - "nrarr;": '\U0000219B', - "nrightarrow;": '\U0000219B', - "nrtri;": '\U000022EB', - "nrtrie;": '\U000022ED', - "nsc;": '\U00002281', - "nsccue;": '\U000022E1', - "nscr;": '\U0001D4C3', - "nshortmid;": '\U00002224', - "nshortparallel;": '\U00002226', - "nsim;": '\U00002241', - "nsime;": '\U00002244', - "nsimeq;": '\U00002244', - "nsmid;": '\U00002224', - "nspar;": '\U00002226', - "nsqsube;": '\U000022E2', - "nsqsupe;": '\U000022E3', - "nsub;": '\U00002284', - "nsube;": '\U00002288', - "nsubseteq;": '\U00002288', - "nsucc;": '\U00002281', - "nsup;": '\U00002285', - "nsupe;": '\U00002289', - "nsupseteq;": '\U00002289', - "ntgl;": '\U00002279', - "ntilde;": '\U000000F1', - "ntlg;": '\U00002278', - "ntriangleleft;": '\U000022EA', - "ntrianglelefteq;": '\U000022EC', - "ntriangleright;": '\U000022EB', - "ntrianglerighteq;": '\U000022ED', - "nu;": '\U000003BD', - "num;": '\U00000023', - "numero;": '\U00002116', - "numsp;": '\U00002007', - "nvDash;": '\U000022AD', - "nvHarr;": '\U00002904', - "nvdash;": '\U000022AC', - "nvinfin;": '\U000029DE', - "nvlArr;": '\U00002902', - "nvrArr;": '\U00002903', - "nwArr;": '\U000021D6', - "nwarhk;": '\U00002923', - "nwarr;": '\U00002196', - "nwarrow;": '\U00002196', - "nwnear;": '\U00002927', - "oS;": '\U000024C8', - "oacute;": '\U000000F3', - "oast;": '\U0000229B', - "ocir;": '\U0000229A', - "ocirc;": '\U000000F4', - "ocy;": '\U0000043E', - "odash;": '\U0000229D', - "odblac;": '\U00000151', - "odiv;": '\U00002A38', - "odot;": '\U00002299', - "odsold;": '\U000029BC', - "oelig;": '\U00000153', - "ofcir;": '\U000029BF', - "ofr;": '\U0001D52C', - "ogon;": '\U000002DB', - "ograve;": '\U000000F2', - "ogt;": '\U000029C1', - "ohbar;": '\U000029B5', - "ohm;": '\U000003A9', - "oint;": '\U0000222E', - "olarr;": '\U000021BA', - "olcir;": '\U000029BE', - "olcross;": '\U000029BB', - "oline;": '\U0000203E', - "olt;": '\U000029C0', - "omacr;": '\U0000014D', - "omega;": '\U000003C9', - "omicron;": '\U000003BF', - "omid;": '\U000029B6', - "ominus;": '\U00002296', - "oopf;": '\U0001D560', - "opar;": '\U000029B7', - "operp;": '\U000029B9', - "oplus;": '\U00002295', - "or;": '\U00002228', - "orarr;": '\U000021BB', - "ord;": '\U00002A5D', - "order;": '\U00002134', - "orderof;": '\U00002134', - "ordf;": '\U000000AA', - "ordm;": '\U000000BA', - "origof;": '\U000022B6', - "oror;": '\U00002A56', - "orslope;": '\U00002A57', - "orv;": '\U00002A5B', - "oscr;": '\U00002134', - "oslash;": '\U000000F8', - "osol;": '\U00002298', - "otilde;": '\U000000F5', - "otimes;": '\U00002297', - "otimesas;": '\U00002A36', - "ouml;": '\U000000F6', - "ovbar;": '\U0000233D', - "par;": '\U00002225', - "para;": '\U000000B6', - "parallel;": '\U00002225', - "parsim;": '\U00002AF3', - "parsl;": '\U00002AFD', - "part;": '\U00002202', - "pcy;": '\U0000043F', - "percnt;": '\U00000025', - "period;": '\U0000002E', - "permil;": '\U00002030', - "perp;": '\U000022A5', - "pertenk;": '\U00002031', - "pfr;": '\U0001D52D', - "phi;": '\U000003C6', - "phiv;": '\U000003D5', - "phmmat;": '\U00002133', - "phone;": '\U0000260E', - "pi;": '\U000003C0', - "pitchfork;": '\U000022D4', - "piv;": '\U000003D6', - "planck;": '\U0000210F', - "planckh;": '\U0000210E', - "plankv;": '\U0000210F', - "plus;": '\U0000002B', - "plusacir;": '\U00002A23', - "plusb;": '\U0000229E', - "pluscir;": '\U00002A22', - "plusdo;": '\U00002214', - "plusdu;": '\U00002A25', - "pluse;": '\U00002A72', - "plusmn;": '\U000000B1', - "plussim;": '\U00002A26', - "plustwo;": '\U00002A27', - "pm;": '\U000000B1', - "pointint;": '\U00002A15', - "popf;": '\U0001D561', - "pound;": '\U000000A3', - "pr;": '\U0000227A', - "prE;": '\U00002AB3', - "prap;": '\U00002AB7', - "prcue;": '\U0000227C', - "pre;": '\U00002AAF', - "prec;": '\U0000227A', - "precapprox;": '\U00002AB7', - "preccurlyeq;": '\U0000227C', - "preceq;": '\U00002AAF', - "precnapprox;": '\U00002AB9', - "precneqq;": '\U00002AB5', - "precnsim;": '\U000022E8', - "precsim;": '\U0000227E', - "prime;": '\U00002032', - "primes;": '\U00002119', - "prnE;": '\U00002AB5', - "prnap;": '\U00002AB9', - "prnsim;": '\U000022E8', - "prod;": '\U0000220F', - "profalar;": '\U0000232E', - "profline;": '\U00002312', - "profsurf;": '\U00002313', - "prop;": '\U0000221D', - "propto;": '\U0000221D', - "prsim;": '\U0000227E', - "prurel;": '\U000022B0', - "pscr;": '\U0001D4C5', - "psi;": '\U000003C8', - "puncsp;": '\U00002008', - "qfr;": '\U0001D52E', - "qint;": '\U00002A0C', - "qopf;": '\U0001D562', - "qprime;": '\U00002057', - "qscr;": '\U0001D4C6', - "quaternions;": '\U0000210D', - "quatint;": '\U00002A16', - "quest;": '\U0000003F', - "questeq;": '\U0000225F', - "quot;": '\U00000022', - "rAarr;": '\U000021DB', - "rArr;": '\U000021D2', - "rAtail;": '\U0000291C', - "rBarr;": '\U0000290F', - "rHar;": '\U00002964', - "racute;": '\U00000155', - "radic;": '\U0000221A', - "raemptyv;": '\U000029B3', - "rang;": '\U000027E9', - "rangd;": '\U00002992', - "range;": '\U000029A5', - "rangle;": '\U000027E9', - "raquo;": '\U000000BB', - "rarr;": '\U00002192', - "rarrap;": '\U00002975', - "rarrb;": '\U000021E5', - "rarrbfs;": '\U00002920', - "rarrc;": '\U00002933', - "rarrfs;": '\U0000291E', - "rarrhk;": '\U000021AA', - "rarrlp;": '\U000021AC', - "rarrpl;": '\U00002945', - "rarrsim;": '\U00002974', - "rarrtl;": '\U000021A3', - "rarrw;": '\U0000219D', - "ratail;": '\U0000291A', - "ratio;": '\U00002236', - "rationals;": '\U0000211A', - "rbarr;": '\U0000290D', - "rbbrk;": '\U00002773', - "rbrace;": '\U0000007D', - "rbrack;": '\U0000005D', - "rbrke;": '\U0000298C', - "rbrksld;": '\U0000298E', - "rbrkslu;": '\U00002990', - "rcaron;": '\U00000159', - "rcedil;": '\U00000157', - "rceil;": '\U00002309', - "rcub;": '\U0000007D', - "rcy;": '\U00000440', - "rdca;": '\U00002937', - "rdldhar;": '\U00002969', - "rdquo;": '\U0000201D', - "rdquor;": '\U0000201D', - "rdsh;": '\U000021B3', - "real;": '\U0000211C', - "realine;": '\U0000211B', - "realpart;": '\U0000211C', - "reals;": '\U0000211D', - "rect;": '\U000025AD', - "reg;": '\U000000AE', - "rfisht;": '\U0000297D', - "rfloor;": '\U0000230B', - "rfr;": '\U0001D52F', - "rhard;": '\U000021C1', - "rharu;": '\U000021C0', - "rharul;": '\U0000296C', - "rho;": '\U000003C1', - "rhov;": '\U000003F1', - "rightarrow;": '\U00002192', - "rightarrowtail;": '\U000021A3', - "rightharpoondown;": '\U000021C1', - "rightharpoonup;": '\U000021C0', - "rightleftarrows;": '\U000021C4', - "rightleftharpoons;": '\U000021CC', - "rightrightarrows;": '\U000021C9', - "rightsquigarrow;": '\U0000219D', - "rightthreetimes;": '\U000022CC', - "ring;": '\U000002DA', - "risingdotseq;": '\U00002253', - "rlarr;": '\U000021C4', - "rlhar;": '\U000021CC', - "rlm;": '\U0000200F', - "rmoust;": '\U000023B1', - "rmoustache;": '\U000023B1', - "rnmid;": '\U00002AEE', - "roang;": '\U000027ED', - "roarr;": '\U000021FE', - "robrk;": '\U000027E7', - "ropar;": '\U00002986', - "ropf;": '\U0001D563', - "roplus;": '\U00002A2E', - "rotimes;": '\U00002A35', - "rpar;": '\U00000029', - "rpargt;": '\U00002994', - "rppolint;": '\U00002A12', - "rrarr;": '\U000021C9', - "rsaquo;": '\U0000203A', - "rscr;": '\U0001D4C7', - "rsh;": '\U000021B1', - "rsqb;": '\U0000005D', - "rsquo;": '\U00002019', - "rsquor;": '\U00002019', - "rthree;": '\U000022CC', - "rtimes;": '\U000022CA', - "rtri;": '\U000025B9', - "rtrie;": '\U000022B5', - "rtrif;": '\U000025B8', - "rtriltri;": '\U000029CE', - "ruluhar;": '\U00002968', - "rx;": '\U0000211E', - "sacute;": '\U0000015B', - "sbquo;": '\U0000201A', - "sc;": '\U0000227B', - "scE;": '\U00002AB4', - "scap;": '\U00002AB8', - "scaron;": '\U00000161', - "sccue;": '\U0000227D', - "sce;": '\U00002AB0', - "scedil;": '\U0000015F', - "scirc;": '\U0000015D', - "scnE;": '\U00002AB6', - "scnap;": '\U00002ABA', - "scnsim;": '\U000022E9', - "scpolint;": '\U00002A13', - "scsim;": '\U0000227F', - "scy;": '\U00000441', - "sdot;": '\U000022C5', - "sdotb;": '\U000022A1', - "sdote;": '\U00002A66', - "seArr;": '\U000021D8', - "searhk;": '\U00002925', - "searr;": '\U00002198', - "searrow;": '\U00002198', - "sect;": '\U000000A7', - "semi;": '\U0000003B', - "seswar;": '\U00002929', - "setminus;": '\U00002216', - "setmn;": '\U00002216', - "sext;": '\U00002736', - "sfr;": '\U0001D530', - "sfrown;": '\U00002322', - "sharp;": '\U0000266F', - "shchcy;": '\U00000449', - "shcy;": '\U00000448', - "shortmid;": '\U00002223', - "shortparallel;": '\U00002225', - "shy;": '\U000000AD', - "sigma;": '\U000003C3', - "sigmaf;": '\U000003C2', - "sigmav;": '\U000003C2', - "sim;": '\U0000223C', - "simdot;": '\U00002A6A', - "sime;": '\U00002243', - "simeq;": '\U00002243', - "simg;": '\U00002A9E', - "simgE;": '\U00002AA0', - "siml;": '\U00002A9D', - "simlE;": '\U00002A9F', - "simne;": '\U00002246', - "simplus;": '\U00002A24', - "simrarr;": '\U00002972', - "slarr;": '\U00002190', - "smallsetminus;": '\U00002216', - "smashp;": '\U00002A33', - "smeparsl;": '\U000029E4', - "smid;": '\U00002223', - "smile;": '\U00002323', - "smt;": '\U00002AAA', - "smte;": '\U00002AAC', - "softcy;": '\U0000044C', - "sol;": '\U0000002F', - "solb;": '\U000029C4', - "solbar;": '\U0000233F', - "sopf;": '\U0001D564', - "spades;": '\U00002660', - "spadesuit;": '\U00002660', - "spar;": '\U00002225', - "sqcap;": '\U00002293', - "sqcup;": '\U00002294', - "sqsub;": '\U0000228F', - "sqsube;": '\U00002291', - "sqsubset;": '\U0000228F', - "sqsubseteq;": '\U00002291', - "sqsup;": '\U00002290', - "sqsupe;": '\U00002292', - "sqsupset;": '\U00002290', - "sqsupseteq;": '\U00002292', - "squ;": '\U000025A1', - "square;": '\U000025A1', - "squarf;": '\U000025AA', - "squf;": '\U000025AA', - "srarr;": '\U00002192', - "sscr;": '\U0001D4C8', - "ssetmn;": '\U00002216', - "ssmile;": '\U00002323', - "sstarf;": '\U000022C6', - "star;": '\U00002606', - "starf;": '\U00002605', - "straightepsilon;": '\U000003F5', - "straightphi;": '\U000003D5', - "strns;": '\U000000AF', - "sub;": '\U00002282', - "subE;": '\U00002AC5', - "subdot;": '\U00002ABD', - "sube;": '\U00002286', - "subedot;": '\U00002AC3', - "submult;": '\U00002AC1', - "subnE;": '\U00002ACB', - "subne;": '\U0000228A', - "subplus;": '\U00002ABF', - "subrarr;": '\U00002979', - "subset;": '\U00002282', - "subseteq;": '\U00002286', - "subseteqq;": '\U00002AC5', - "subsetneq;": '\U0000228A', - "subsetneqq;": '\U00002ACB', - "subsim;": '\U00002AC7', - "subsub;": '\U00002AD5', - "subsup;": '\U00002AD3', - "succ;": '\U0000227B', - "succapprox;": '\U00002AB8', - "succcurlyeq;": '\U0000227D', - "succeq;": '\U00002AB0', - "succnapprox;": '\U00002ABA', - "succneqq;": '\U00002AB6', - "succnsim;": '\U000022E9', - "succsim;": '\U0000227F', - "sum;": '\U00002211', - "sung;": '\U0000266A', - "sup;": '\U00002283', - "sup1;": '\U000000B9', - "sup2;": '\U000000B2', - "sup3;": '\U000000B3', - "supE;": '\U00002AC6', - "supdot;": '\U00002ABE', - "supdsub;": '\U00002AD8', - "supe;": '\U00002287', - "supedot;": '\U00002AC4', - "suphsol;": '\U000027C9', - "suphsub;": '\U00002AD7', - "suplarr;": '\U0000297B', - "supmult;": '\U00002AC2', - "supnE;": '\U00002ACC', - "supne;": '\U0000228B', - "supplus;": '\U00002AC0', - "supset;": '\U00002283', - "supseteq;": '\U00002287', - "supseteqq;": '\U00002AC6', - "supsetneq;": '\U0000228B', - "supsetneqq;": '\U00002ACC', - "supsim;": '\U00002AC8', - "supsub;": '\U00002AD4', - "supsup;": '\U00002AD6', - "swArr;": '\U000021D9', - "swarhk;": '\U00002926', - "swarr;": '\U00002199', - "swarrow;": '\U00002199', - "swnwar;": '\U0000292A', - "szlig;": '\U000000DF', - "target;": '\U00002316', - "tau;": '\U000003C4', - "tbrk;": '\U000023B4', - "tcaron;": '\U00000165', - "tcedil;": '\U00000163', - "tcy;": '\U00000442', - "tdot;": '\U000020DB', - "telrec;": '\U00002315', - "tfr;": '\U0001D531', - "there4;": '\U00002234', - "therefore;": '\U00002234', - "theta;": '\U000003B8', - "thetasym;": '\U000003D1', - "thetav;": '\U000003D1', - "thickapprox;": '\U00002248', - "thicksim;": '\U0000223C', - "thinsp;": '\U00002009', - "thkap;": '\U00002248', - "thksim;": '\U0000223C', - "thorn;": '\U000000FE', - "tilde;": '\U000002DC', - "times;": '\U000000D7', - "timesb;": '\U000022A0', - "timesbar;": '\U00002A31', - "timesd;": '\U00002A30', - "tint;": '\U0000222D', - "toea;": '\U00002928', - "top;": '\U000022A4', - "topbot;": '\U00002336', - "topcir;": '\U00002AF1', - "topf;": '\U0001D565', - "topfork;": '\U00002ADA', - "tosa;": '\U00002929', - "tprime;": '\U00002034', - "trade;": '\U00002122', - "triangle;": '\U000025B5', - "triangledown;": '\U000025BF', - "triangleleft;": '\U000025C3', - "trianglelefteq;": '\U000022B4', - "triangleq;": '\U0000225C', - "triangleright;": '\U000025B9', - "trianglerighteq;": '\U000022B5', - "tridot;": '\U000025EC', - "trie;": '\U0000225C', - "triminus;": '\U00002A3A', - "triplus;": '\U00002A39', - "trisb;": '\U000029CD', - "tritime;": '\U00002A3B', - "trpezium;": '\U000023E2', - "tscr;": '\U0001D4C9', - "tscy;": '\U00000446', - "tshcy;": '\U0000045B', - "tstrok;": '\U00000167', - "twixt;": '\U0000226C', - "twoheadleftarrow;": '\U0000219E', - "twoheadrightarrow;": '\U000021A0', - "uArr;": '\U000021D1', - "uHar;": '\U00002963', - "uacute;": '\U000000FA', - "uarr;": '\U00002191', - "ubrcy;": '\U0000045E', - "ubreve;": '\U0000016D', - "ucirc;": '\U000000FB', - "ucy;": '\U00000443', - "udarr;": '\U000021C5', - "udblac;": '\U00000171', - "udhar;": '\U0000296E', - "ufisht;": '\U0000297E', - "ufr;": '\U0001D532', - "ugrave;": '\U000000F9', - "uharl;": '\U000021BF', - "uharr;": '\U000021BE', - "uhblk;": '\U00002580', - "ulcorn;": '\U0000231C', - "ulcorner;": '\U0000231C', - "ulcrop;": '\U0000230F', - "ultri;": '\U000025F8', - "umacr;": '\U0000016B', - "uml;": '\U000000A8', - "uogon;": '\U00000173', - "uopf;": '\U0001D566', - "uparrow;": '\U00002191', - "updownarrow;": '\U00002195', - "upharpoonleft;": '\U000021BF', - "upharpoonright;": '\U000021BE', - "uplus;": '\U0000228E', - "upsi;": '\U000003C5', - "upsih;": '\U000003D2', - "upsilon;": '\U000003C5', - "upuparrows;": '\U000021C8', - "urcorn;": '\U0000231D', - "urcorner;": '\U0000231D', - "urcrop;": '\U0000230E', - "uring;": '\U0000016F', - "urtri;": '\U000025F9', - "uscr;": '\U0001D4CA', - "utdot;": '\U000022F0', - "utilde;": '\U00000169', - "utri;": '\U000025B5', - "utrif;": '\U000025B4', - "uuarr;": '\U000021C8', - "uuml;": '\U000000FC', - "uwangle;": '\U000029A7', - "vArr;": '\U000021D5', - "vBar;": '\U00002AE8', - "vBarv;": '\U00002AE9', - "vDash;": '\U000022A8', - "vangrt;": '\U0000299C', - "varepsilon;": '\U000003F5', - "varkappa;": '\U000003F0', - "varnothing;": '\U00002205', - "varphi;": '\U000003D5', - "varpi;": '\U000003D6', - "varpropto;": '\U0000221D', - "varr;": '\U00002195', - "varrho;": '\U000003F1', - "varsigma;": '\U000003C2', - "vartheta;": '\U000003D1', - "vartriangleleft;": '\U000022B2', - "vartriangleright;": '\U000022B3', - "vcy;": '\U00000432', - "vdash;": '\U000022A2', - "vee;": '\U00002228', - "veebar;": '\U000022BB', - "veeeq;": '\U0000225A', - "vellip;": '\U000022EE', - "verbar;": '\U0000007C', - "vert;": '\U0000007C', - "vfr;": '\U0001D533', - "vltri;": '\U000022B2', - "vopf;": '\U0001D567', - "vprop;": '\U0000221D', - "vrtri;": '\U000022B3', - "vscr;": '\U0001D4CB', - "vzigzag;": '\U0000299A', - "wcirc;": '\U00000175', - "wedbar;": '\U00002A5F', - "wedge;": '\U00002227', - "wedgeq;": '\U00002259', - "weierp;": '\U00002118', - "wfr;": '\U0001D534', - "wopf;": '\U0001D568', - "wp;": '\U00002118', - "wr;": '\U00002240', - "wreath;": '\U00002240', - "wscr;": '\U0001D4CC', - "xcap;": '\U000022C2', - "xcirc;": '\U000025EF', - "xcup;": '\U000022C3', - "xdtri;": '\U000025BD', - "xfr;": '\U0001D535', - "xhArr;": '\U000027FA', - "xharr;": '\U000027F7', - "xi;": '\U000003BE', - "xlArr;": '\U000027F8', - "xlarr;": '\U000027F5', - "xmap;": '\U000027FC', - "xnis;": '\U000022FB', - "xodot;": '\U00002A00', - "xopf;": '\U0001D569', - "xoplus;": '\U00002A01', - "xotime;": '\U00002A02', - "xrArr;": '\U000027F9', - "xrarr;": '\U000027F6', - "xscr;": '\U0001D4CD', - "xsqcup;": '\U00002A06', - "xuplus;": '\U00002A04', - "xutri;": '\U000025B3', - "xvee;": '\U000022C1', - "xwedge;": '\U000022C0', - "yacute;": '\U000000FD', - "yacy;": '\U0000044F', - "ycirc;": '\U00000177', - "ycy;": '\U0000044B', - "yen;": '\U000000A5', - "yfr;": '\U0001D536', - "yicy;": '\U00000457', - "yopf;": '\U0001D56A', - "yscr;": '\U0001D4CE', - "yucy;": '\U0000044E', - "yuml;": '\U000000FF', - "zacute;": '\U0000017A', - "zcaron;": '\U0000017E', - "zcy;": '\U00000437', - "zdot;": '\U0000017C', - "zeetrf;": '\U00002128', - "zeta;": '\U000003B6', - "zfr;": '\U0001D537', - "zhcy;": '\U00000436', - "zigrarr;": '\U000021DD', - "zopf;": '\U0001D56B', - "zscr;": '\U0001D4CF', - "zwj;": '\U0000200D', - "zwnj;": '\U0000200C', - "AElig": '\U000000C6', - "AMP": '\U00000026', - "Aacute": '\U000000C1', - "Acirc": '\U000000C2', - "Agrave": '\U000000C0', - "Aring": '\U000000C5', - "Atilde": '\U000000C3', - "Auml": '\U000000C4', - "COPY": '\U000000A9', - "Ccedil": '\U000000C7', - "ETH": '\U000000D0', - "Eacute": '\U000000C9', - "Ecirc": '\U000000CA', - "Egrave": '\U000000C8', - "Euml": '\U000000CB', - "GT": '\U0000003E', - "Iacute": '\U000000CD', - "Icirc": '\U000000CE', - "Igrave": '\U000000CC', - "Iuml": '\U000000CF', - "LT": '\U0000003C', - "Ntilde": '\U000000D1', - "Oacute": '\U000000D3', - "Ocirc": '\U000000D4', - "Ograve": '\U000000D2', - "Oslash": '\U000000D8', - "Otilde": '\U000000D5', - "Ouml": '\U000000D6', - "QUOT": '\U00000022', - "REG": '\U000000AE', - "THORN": '\U000000DE', - "Uacute": '\U000000DA', - "Ucirc": '\U000000DB', - "Ugrave": '\U000000D9', - "Uuml": '\U000000DC', - "Yacute": '\U000000DD', - "aacute": '\U000000E1', - "acirc": '\U000000E2', - "acute": '\U000000B4', - "aelig": '\U000000E6', - "agrave": '\U000000E0', - "amp": '\U00000026', - "aring": '\U000000E5', - "atilde": '\U000000E3', - "auml": '\U000000E4', - "brvbar": '\U000000A6', - "ccedil": '\U000000E7', - "cedil": '\U000000B8', - "cent": '\U000000A2', - "copy": '\U000000A9', - "curren": '\U000000A4', - "deg": '\U000000B0', - "divide": '\U000000F7', - "eacute": '\U000000E9', - "ecirc": '\U000000EA', - "egrave": '\U000000E8', - "eth": '\U000000F0', - "euml": '\U000000EB', - "frac12": '\U000000BD', - "frac14": '\U000000BC', - "frac34": '\U000000BE', - "gt": '\U0000003E', - "iacute": '\U000000ED', - "icirc": '\U000000EE', - "iexcl": '\U000000A1', - "igrave": '\U000000EC', - "iquest": '\U000000BF', - "iuml": '\U000000EF', - "laquo": '\U000000AB', - "lt": '\U0000003C', - "macr": '\U000000AF', - "micro": '\U000000B5', - "middot": '\U000000B7', - "nbsp": '\U000000A0', - "not": '\U000000AC', - "ntilde": '\U000000F1', - "oacute": '\U000000F3', - "ocirc": '\U000000F4', - "ograve": '\U000000F2', - "ordf": '\U000000AA', - "ordm": '\U000000BA', - "oslash": '\U000000F8', - "otilde": '\U000000F5', - "ouml": '\U000000F6', - "para": '\U000000B6', - "plusmn": '\U000000B1', - "pound": '\U000000A3', - "quot": '\U00000022', - "raquo": '\U000000BB', - "reg": '\U000000AE', - "sect": '\U000000A7', - "shy": '\U000000AD', - "sup1": '\U000000B9', - "sup2": '\U000000B2', - "sup3": '\U000000B3', - "szlig": '\U000000DF', - "thorn": '\U000000FE', - "times": '\U000000D7', - "uacute": '\U000000FA', - "ucirc": '\U000000FB', - "ugrave": '\U000000F9', - "uml": '\U000000A8', - "uuml": '\U000000FC', - "yacute": '\U000000FD', - "yen": '\U000000A5', - "yuml": '\U000000FF', -} - -// HTML entities that are two unicode codepoints. -var entity2 = map[string][2]rune{ - // TODO(nigeltao): Handle replacements that are wider than their names. - // "nLt;": {'\u226A', '\u20D2'}, - // "nGt;": {'\u226B', '\u20D2'}, - "NotEqualTilde;": {'\u2242', '\u0338'}, - "NotGreaterFullEqual;": {'\u2267', '\u0338'}, - "NotGreaterGreater;": {'\u226B', '\u0338'}, - "NotGreaterSlantEqual;": {'\u2A7E', '\u0338'}, - "NotHumpDownHump;": {'\u224E', '\u0338'}, - "NotHumpEqual;": {'\u224F', '\u0338'}, - "NotLeftTriangleBar;": {'\u29CF', '\u0338'}, - "NotLessLess;": {'\u226A', '\u0338'}, - "NotLessSlantEqual;": {'\u2A7D', '\u0338'}, - "NotNestedGreaterGreater;": {'\u2AA2', '\u0338'}, - "NotNestedLessLess;": {'\u2AA1', '\u0338'}, - "NotPrecedesEqual;": {'\u2AAF', '\u0338'}, - "NotRightTriangleBar;": {'\u29D0', '\u0338'}, - "NotSquareSubset;": {'\u228F', '\u0338'}, - "NotSquareSuperset;": {'\u2290', '\u0338'}, - "NotSubset;": {'\u2282', '\u20D2'}, - "NotSucceedsEqual;": {'\u2AB0', '\u0338'}, - "NotSucceedsTilde;": {'\u227F', '\u0338'}, - "NotSuperset;": {'\u2283', '\u20D2'}, - "ThickSpace;": {'\u205F', '\u200A'}, - "acE;": {'\u223E', '\u0333'}, - "bne;": {'\u003D', '\u20E5'}, - "bnequiv;": {'\u2261', '\u20E5'}, - "caps;": {'\u2229', '\uFE00'}, - "cups;": {'\u222A', '\uFE00'}, - "fjlig;": {'\u0066', '\u006A'}, - "gesl;": {'\u22DB', '\uFE00'}, - "gvertneqq;": {'\u2269', '\uFE00'}, - "gvnE;": {'\u2269', '\uFE00'}, - "lates;": {'\u2AAD', '\uFE00'}, - "lesg;": {'\u22DA', '\uFE00'}, - "lvertneqq;": {'\u2268', '\uFE00'}, - "lvnE;": {'\u2268', '\uFE00'}, - "nGg;": {'\u22D9', '\u0338'}, - "nGtv;": {'\u226B', '\u0338'}, - "nLl;": {'\u22D8', '\u0338'}, - "nLtv;": {'\u226A', '\u0338'}, - "nang;": {'\u2220', '\u20D2'}, - "napE;": {'\u2A70', '\u0338'}, - "napid;": {'\u224B', '\u0338'}, - "nbump;": {'\u224E', '\u0338'}, - "nbumpe;": {'\u224F', '\u0338'}, - "ncongdot;": {'\u2A6D', '\u0338'}, - "nedot;": {'\u2250', '\u0338'}, - "nesim;": {'\u2242', '\u0338'}, - "ngE;": {'\u2267', '\u0338'}, - "ngeqq;": {'\u2267', '\u0338'}, - "ngeqslant;": {'\u2A7E', '\u0338'}, - "nges;": {'\u2A7E', '\u0338'}, - "nlE;": {'\u2266', '\u0338'}, - "nleqq;": {'\u2266', '\u0338'}, - "nleqslant;": {'\u2A7D', '\u0338'}, - "nles;": {'\u2A7D', '\u0338'}, - "notinE;": {'\u22F9', '\u0338'}, - "notindot;": {'\u22F5', '\u0338'}, - "nparsl;": {'\u2AFD', '\u20E5'}, - "npart;": {'\u2202', '\u0338'}, - "npre;": {'\u2AAF', '\u0338'}, - "npreceq;": {'\u2AAF', '\u0338'}, - "nrarrc;": {'\u2933', '\u0338'}, - "nrarrw;": {'\u219D', '\u0338'}, - "nsce;": {'\u2AB0', '\u0338'}, - "nsubE;": {'\u2AC5', '\u0338'}, - "nsubset;": {'\u2282', '\u20D2'}, - "nsubseteqq;": {'\u2AC5', '\u0338'}, - "nsucceq;": {'\u2AB0', '\u0338'}, - "nsupE;": {'\u2AC6', '\u0338'}, - "nsupset;": {'\u2283', '\u20D2'}, - "nsupseteqq;": {'\u2AC6', '\u0338'}, - "nvap;": {'\u224D', '\u20D2'}, - "nvge;": {'\u2265', '\u20D2'}, - "nvgt;": {'\u003E', '\u20D2'}, - "nvle;": {'\u2264', '\u20D2'}, - "nvlt;": {'\u003C', '\u20D2'}, - "nvltrie;": {'\u22B4', '\u20D2'}, - "nvrtrie;": {'\u22B5', '\u20D2'}, - "nvsim;": {'\u223C', '\u20D2'}, - "race;": {'\u223D', '\u0331'}, - "smtes;": {'\u2AAC', '\uFE00'}, - "sqcaps;": {'\u2293', '\uFE00'}, - "sqcups;": {'\u2294', '\uFE00'}, - "varsubsetneq;": {'\u228A', '\uFE00'}, - "varsubsetneqq;": {'\u2ACB', '\uFE00'}, - "varsupsetneq;": {'\u228B', '\uFE00'}, - "varsupsetneqq;": {'\u2ACC', '\uFE00'}, - "vnsub;": {'\u2282', '\u20D2'}, - "vnsup;": {'\u2283', '\u20D2'}, - "vsubnE;": {'\u2ACB', '\uFE00'}, - "vsubne;": {'\u228A', '\uFE00'}, - "vsupnE;": {'\u2ACC', '\uFE00'}, - "vsupne;": {'\u228B', '\uFE00'}, -} diff --git a/vendor/golang.org/x/net/html/escape.go b/vendor/golang.org/x/net/html/escape.go deleted file mode 100644 index d856139..0000000 --- a/vendor/golang.org/x/net/html/escape.go +++ /dev/null @@ -1,258 +0,0 @@ -// Copyright 2010 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package html - -import ( - "bytes" - "strings" - "unicode/utf8" -) - -// These replacements permit compatibility with old numeric entities that -// assumed Windows-1252 encoding. -// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference -var replacementTable = [...]rune{ - '\u20AC', // First entry is what 0x80 should be replaced with. - '\u0081', - '\u201A', - '\u0192', - '\u201E', - '\u2026', - '\u2020', - '\u2021', - '\u02C6', - '\u2030', - '\u0160', - '\u2039', - '\u0152', - '\u008D', - '\u017D', - '\u008F', - '\u0090', - '\u2018', - '\u2019', - '\u201C', - '\u201D', - '\u2022', - '\u2013', - '\u2014', - '\u02DC', - '\u2122', - '\u0161', - '\u203A', - '\u0153', - '\u009D', - '\u017E', - '\u0178', // Last entry is 0x9F. - // 0x00->'\uFFFD' is handled programmatically. - // 0x0D->'\u000D' is a no-op. -} - -// unescapeEntity reads an entity like "<" from b[src:] and writes the -// corresponding "<" to b[dst:], returning the incremented dst and src cursors. -// Precondition: b[src] == '&' && dst <= src. -// attribute should be true if parsing an attribute value. -func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) { - // https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference - - // i starts at 1 because we already know that s[0] == '&'. - i, s := 1, b[src:] - - if len(s) <= 1 { - b[dst] = b[src] - return dst + 1, src + 1 - } - - if s[i] == '#' { - if len(s) <= 3 { // We need to have at least "&#.". - b[dst] = b[src] - return dst + 1, src + 1 - } - i++ - c := s[i] - hex := false - if c == 'x' || c == 'X' { - hex = true - i++ - } - - x := '\x00' - for i < len(s) { - c = s[i] - i++ - if hex { - if '0' <= c && c <= '9' { - x = 16*x + rune(c) - '0' - continue - } else if 'a' <= c && c <= 'f' { - x = 16*x + rune(c) - 'a' + 10 - continue - } else if 'A' <= c && c <= 'F' { - x = 16*x + rune(c) - 'A' + 10 - continue - } - } else if '0' <= c && c <= '9' { - x = 10*x + rune(c) - '0' - continue - } - if c != ';' { - i-- - } - break - } - - if i <= 3 { // No characters matched. - b[dst] = b[src] - return dst + 1, src + 1 - } - - if 0x80 <= x && x <= 0x9F { - // Replace characters from Windows-1252 with UTF-8 equivalents. - x = replacementTable[x-0x80] - } else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF { - // Replace invalid characters with the replacement character. - x = '\uFFFD' - } - - return dst + utf8.EncodeRune(b[dst:], x), src + i - } - - // Consume the maximum number of characters possible, with the - // consumed characters matching one of the named references. - - for i < len(s) { - c := s[i] - i++ - // Lower-cased characters are more common in entities, so we check for them first. - if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' { - continue - } - if c != ';' { - i-- - } - break - } - - entityName := string(s[1:i]) - if entityName == "" { - // No-op. - } else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' { - // No-op. - } else if x := entity[entityName]; x != 0 { - return dst + utf8.EncodeRune(b[dst:], x), src + i - } else if x := entity2[entityName]; x[0] != 0 { - dst1 := dst + utf8.EncodeRune(b[dst:], x[0]) - return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i - } else if !attribute { - maxLen := len(entityName) - 1 - if maxLen > longestEntityWithoutSemicolon { - maxLen = longestEntityWithoutSemicolon - } - for j := maxLen; j > 1; j-- { - if x := entity[entityName[:j]]; x != 0 { - return dst + utf8.EncodeRune(b[dst:], x), src + j + 1 - } - } - } - - dst1, src1 = dst+i, src+i - copy(b[dst:dst1], b[src:src1]) - return dst1, src1 -} - -// unescape unescapes b's entities in-place, so that "a<b" becomes "a': - esc = ">" - case '"': - // """ is shorter than """. - esc = """ - case '\r': - esc = " " - default: - panic("unrecognized escape character") - } - s = s[i+1:] - if _, err := w.WriteString(esc); err != nil { - return err - } - i = strings.IndexAny(s, escapedChars) - } - _, err := w.WriteString(s) - return err -} - -// EscapeString escapes special characters like "<" to become "<". It -// escapes only five such characters: <, >, &, ' and ". -// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't -// always true. -func EscapeString(s string) string { - if strings.IndexAny(s, escapedChars) == -1 { - return s - } - var buf bytes.Buffer - escape(&buf, s) - return buf.String() -} - -// UnescapeString unescapes entities like "<" to become "<". It unescapes a -// larger range of entities than EscapeString escapes. For example, "á" -// unescapes to "á", as does "á" and "&xE1;". -// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't -// always true. -func UnescapeString(s string) string { - for _, c := range s { - if c == '&' { - return string(unescape([]byte(s), false)) - } - } - return s -} diff --git a/vendor/golang.org/x/net/html/foreign.go b/vendor/golang.org/x/net/html/foreign.go deleted file mode 100644 index 9da9e9d..0000000 --- a/vendor/golang.org/x/net/html/foreign.go +++ /dev/null @@ -1,222 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package html - -import ( - "strings" -) - -func adjustAttributeNames(aa []Attribute, nameMap map[string]string) { - for i := range aa { - if newName, ok := nameMap[aa[i].Key]; ok { - aa[i].Key = newName - } - } -} - -func adjustForeignAttributes(aa []Attribute) { - for i, a := range aa { - if a.Key == "" || a.Key[0] != 'x' { - continue - } - switch a.Key { - case "xlink:actuate", "xlink:arcrole", "xlink:href", "xlink:role", "xlink:show", - "xlink:title", "xlink:type", "xml:base", "xml:lang", "xml:space", "xmlns:xlink": - j := strings.Index(a.Key, ":") - aa[i].Namespace = a.Key[:j] - aa[i].Key = a.Key[j+1:] - } - } -} - -func htmlIntegrationPoint(n *Node) bool { - if n.Type != ElementNode { - return false - } - switch n.Namespace { - case "math": - if n.Data == "annotation-xml" { - for _, a := range n.Attr { - if a.Key == "encoding" { - val := strings.ToLower(a.Val) - if val == "text/html" || val == "application/xhtml+xml" { - return true - } - } - } - } - case "svg": - switch n.Data { - case "desc", "foreignObject", "title": - return true - } - } - return false -} - -func mathMLTextIntegrationPoint(n *Node) bool { - if n.Namespace != "math" { - return false - } - switch n.Data { - case "mi", "mo", "mn", "ms", "mtext": - return true - } - return false -} - -// Section 12.2.6.5. -var breakout = map[string]bool{ - "b": true, - "big": true, - "blockquote": true, - "body": true, - "br": true, - "center": true, - "code": true, - "dd": true, - "div": true, - "dl": true, - "dt": true, - "em": true, - "embed": true, - "h1": true, - "h2": true, - "h3": true, - "h4": true, - "h5": true, - "h6": true, - "head": true, - "hr": true, - "i": true, - "img": true, - "li": true, - "listing": true, - "menu": true, - "meta": true, - "nobr": true, - "ol": true, - "p": true, - "pre": true, - "ruby": true, - "s": true, - "small": true, - "span": true, - "strong": true, - "strike": true, - "sub": true, - "sup": true, - "table": true, - "tt": true, - "u": true, - "ul": true, - "var": true, -} - -// Section 12.2.6.5. -var svgTagNameAdjustments = map[string]string{ - "altglyph": "altGlyph", - "altglyphdef": "altGlyphDef", - "altglyphitem": "altGlyphItem", - "animatecolor": "animateColor", - "animatemotion": "animateMotion", - "animatetransform": "animateTransform", - "clippath": "clipPath", - "feblend": "feBlend", - "fecolormatrix": "feColorMatrix", - "fecomponenttransfer": "feComponentTransfer", - "fecomposite": "feComposite", - "feconvolvematrix": "feConvolveMatrix", - "fediffuselighting": "feDiffuseLighting", - "fedisplacementmap": "feDisplacementMap", - "fedistantlight": "feDistantLight", - "feflood": "feFlood", - "fefunca": "feFuncA", - "fefuncb": "feFuncB", - "fefuncg": "feFuncG", - "fefuncr": "feFuncR", - "fegaussianblur": "feGaussianBlur", - "feimage": "feImage", - "femerge": "feMerge", - "femergenode": "feMergeNode", - "femorphology": "feMorphology", - "feoffset": "feOffset", - "fepointlight": "fePointLight", - "fespecularlighting": "feSpecularLighting", - "fespotlight": "feSpotLight", - "fetile": "feTile", - "feturbulence": "feTurbulence", - "foreignobject": "foreignObject", - "glyphref": "glyphRef", - "lineargradient": "linearGradient", - "radialgradient": "radialGradient", - "textpath": "textPath", -} - -// Section 12.2.6.1 -var mathMLAttributeAdjustments = map[string]string{ - "definitionurl": "definitionURL", -} - -var svgAttributeAdjustments = map[string]string{ - "attributename": "attributeName", - "attributetype": "attributeType", - "basefrequency": "baseFrequency", - "baseprofile": "baseProfile", - "calcmode": "calcMode", - "clippathunits": "clipPathUnits", - "diffuseconstant": "diffuseConstant", - "edgemode": "edgeMode", - "filterunits": "filterUnits", - "glyphref": "glyphRef", - "gradienttransform": "gradientTransform", - "gradientunits": "gradientUnits", - "kernelmatrix": "kernelMatrix", - "kernelunitlength": "kernelUnitLength", - "keypoints": "keyPoints", - "keysplines": "keySplines", - "keytimes": "keyTimes", - "lengthadjust": "lengthAdjust", - "limitingconeangle": "limitingConeAngle", - "markerheight": "markerHeight", - "markerunits": "markerUnits", - "markerwidth": "markerWidth", - "maskcontentunits": "maskContentUnits", - "maskunits": "maskUnits", - "numoctaves": "numOctaves", - "pathlength": "pathLength", - "patterncontentunits": "patternContentUnits", - "patterntransform": "patternTransform", - "patternunits": "patternUnits", - "pointsatx": "pointsAtX", - "pointsaty": "pointsAtY", - "pointsatz": "pointsAtZ", - "preservealpha": "preserveAlpha", - "preserveaspectratio": "preserveAspectRatio", - "primitiveunits": "primitiveUnits", - "refx": "refX", - "refy": "refY", - "repeatcount": "repeatCount", - "repeatdur": "repeatDur", - "requiredextensions": "requiredExtensions", - "requiredfeatures": "requiredFeatures", - "specularconstant": "specularConstant", - "specularexponent": "specularExponent", - "spreadmethod": "spreadMethod", - "startoffset": "startOffset", - "stddeviation": "stdDeviation", - "stitchtiles": "stitchTiles", - "surfacescale": "surfaceScale", - "systemlanguage": "systemLanguage", - "tablevalues": "tableValues", - "targetx": "targetX", - "targety": "targetY", - "textlength": "textLength", - "viewbox": "viewBox", - "viewtarget": "viewTarget", - "xchannelselector": "xChannelSelector", - "ychannelselector": "yChannelSelector", - "zoomandpan": "zoomAndPan", -} diff --git a/vendor/golang.org/x/net/html/node.go b/vendor/golang.org/x/net/html/node.go deleted file mode 100644 index 1350eef..0000000 --- a/vendor/golang.org/x/net/html/node.go +++ /dev/null @@ -1,225 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package html - -import ( - "golang.org/x/net/html/atom" -) - -// A NodeType is the type of a Node. -type NodeType uint32 - -const ( - ErrorNode NodeType = iota - TextNode - DocumentNode - ElementNode - CommentNode - DoctypeNode - // RawNode nodes are not returned by the parser, but can be part of the - // Node tree passed to func Render to insert raw HTML (without escaping). - // If so, this package makes no guarantee that the rendered HTML is secure - // (from e.g. Cross Site Scripting attacks) or well-formed. - RawNode - scopeMarkerNode -) - -// Section 12.2.4.3 says "The markers are inserted when entering applet, -// object, marquee, template, td, th, and caption elements, and are used -// to prevent formatting from "leaking" into applet, object, marquee, -// template, td, th, and caption elements". -var scopeMarker = Node{Type: scopeMarkerNode} - -// A Node consists of a NodeType and some Data (tag name for element nodes, -// content for text) and are part of a tree of Nodes. Element nodes may also -// have a Namespace and contain a slice of Attributes. Data is unescaped, so -// that it looks like "a 0 { - return (*s)[i-1] - } - return nil -} - -// index returns the index of the top-most occurrence of n in the stack, or -1 -// if n is not present. -func (s *nodeStack) index(n *Node) int { - for i := len(*s) - 1; i >= 0; i-- { - if (*s)[i] == n { - return i - } - } - return -1 -} - -// contains returns whether a is within s. -func (s *nodeStack) contains(a atom.Atom) bool { - for _, n := range *s { - if n.DataAtom == a && n.Namespace == "" { - return true - } - } - return false -} - -// insert inserts a node at the given index. -func (s *nodeStack) insert(i int, n *Node) { - (*s) = append(*s, nil) - copy((*s)[i+1:], (*s)[i:]) - (*s)[i] = n -} - -// remove removes a node from the stack. It is a no-op if n is not present. -func (s *nodeStack) remove(n *Node) { - i := s.index(n) - if i == -1 { - return - } - copy((*s)[i:], (*s)[i+1:]) - j := len(*s) - 1 - (*s)[j] = nil - *s = (*s)[:j] -} - -type insertionModeStack []insertionMode - -func (s *insertionModeStack) pop() (im insertionMode) { - i := len(*s) - im = (*s)[i-1] - *s = (*s)[:i-1] - return im -} - -func (s *insertionModeStack) top() insertionMode { - if i := len(*s); i > 0 { - return (*s)[i-1] - } - return nil -} diff --git a/vendor/golang.org/x/net/html/parse.go b/vendor/golang.org/x/net/html/parse.go deleted file mode 100644 index 038941d..0000000 --- a/vendor/golang.org/x/net/html/parse.go +++ /dev/null @@ -1,2460 +0,0 @@ -// Copyright 2010 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package html - -import ( - "errors" - "fmt" - "io" - "strings" - - a "golang.org/x/net/html/atom" -) - -// A parser implements the HTML5 parsing algorithm: -// https://html.spec.whatwg.org/multipage/syntax.html#tree-construction -type parser struct { - // tokenizer provides the tokens for the parser. - tokenizer *Tokenizer - // tok is the most recently read token. - tok Token - // Self-closing tags like
are treated as start tags, except that - // hasSelfClosingToken is set while they are being processed. - hasSelfClosingToken bool - // doc is the document root element. - doc *Node - // The stack of open elements (section 12.2.4.2) and active formatting - // elements (section 12.2.4.3). - oe, afe nodeStack - // Element pointers (section 12.2.4.4). - head, form *Node - // Other parsing state flags (section 12.2.4.5). - scripting, framesetOK bool - // The stack of template insertion modes - templateStack insertionModeStack - // im is the current insertion mode. - im insertionMode - // originalIM is the insertion mode to go back to after completing a text - // or inTableText insertion mode. - originalIM insertionMode - // fosterParenting is whether new elements should be inserted according to - // the foster parenting rules (section 12.2.6.1). - fosterParenting bool - // quirks is whether the parser is operating in "quirks mode." - quirks bool - // fragment is whether the parser is parsing an HTML fragment. - fragment bool - // context is the context element when parsing an HTML fragment - // (section 12.4). - context *Node -} - -func (p *parser) top() *Node { - if n := p.oe.top(); n != nil { - return n - } - return p.doc -} - -// Stop tags for use in popUntil. These come from section 12.2.4.2. -var ( - defaultScopeStopTags = map[string][]a.Atom{ - "": {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template}, - "math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext}, - "svg": {a.Desc, a.ForeignObject, a.Title}, - } -) - -type scope int - -const ( - defaultScope scope = iota - listItemScope - buttonScope - tableScope - tableRowScope - tableBodyScope - selectScope -) - -// popUntil pops the stack of open elements at the highest element whose tag -// is in matchTags, provided there is no higher element in the scope's stop -// tags (as defined in section 12.2.4.2). It returns whether or not there was -// such an element. If there was not, popUntil leaves the stack unchanged. -// -// For example, the set of stop tags for table scope is: "html", "table". If -// the stack was: -// ["html", "body", "font", "table", "b", "i", "u"] -// then popUntil(tableScope, "font") would return false, but -// popUntil(tableScope, "i") would return true and the stack would become: -// ["html", "body", "font", "table", "b"] -// -// If an element's tag is in both the stop tags and matchTags, then the stack -// will be popped and the function returns true (provided, of course, there was -// no higher element in the stack that was also in the stop tags). For example, -// popUntil(tableScope, "table") returns true and leaves: -// ["html", "body", "font"] -func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool { - if i := p.indexOfElementInScope(s, matchTags...); i != -1 { - p.oe = p.oe[:i] - return true - } - return false -} - -// indexOfElementInScope returns the index in p.oe of the highest element whose -// tag is in matchTags that is in scope. If no matching element is in scope, it -// returns -1. -func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int { - for i := len(p.oe) - 1; i >= 0; i-- { - tagAtom := p.oe[i].DataAtom - if p.oe[i].Namespace == "" { - for _, t := range matchTags { - if t == tagAtom { - return i - } - } - switch s { - case defaultScope: - // No-op. - case listItemScope: - if tagAtom == a.Ol || tagAtom == a.Ul { - return -1 - } - case buttonScope: - if tagAtom == a.Button { - return -1 - } - case tableScope: - if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template { - return -1 - } - case selectScope: - if tagAtom != a.Optgroup && tagAtom != a.Option { - return -1 - } - default: - panic("unreachable") - } - } - switch s { - case defaultScope, listItemScope, buttonScope: - for _, t := range defaultScopeStopTags[p.oe[i].Namespace] { - if t == tagAtom { - return -1 - } - } - } - } - return -1 -} - -// elementInScope is like popUntil, except that it doesn't modify the stack of -// open elements. -func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool { - return p.indexOfElementInScope(s, matchTags...) != -1 -} - -// clearStackToContext pops elements off the stack of open elements until a -// scope-defined element is found. -func (p *parser) clearStackToContext(s scope) { - for i := len(p.oe) - 1; i >= 0; i-- { - tagAtom := p.oe[i].DataAtom - switch s { - case tableScope: - if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template { - p.oe = p.oe[:i+1] - return - } - case tableRowScope: - if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template { - p.oe = p.oe[:i+1] - return - } - case tableBodyScope: - if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template { - p.oe = p.oe[:i+1] - return - } - default: - panic("unreachable") - } - } -} - -// parseGenericRawTextElements implements the generic raw text element parsing -// algorithm defined in 12.2.6.2. -// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text -// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part -// officially, need to make tokenizer consider both states. -func (p *parser) parseGenericRawTextElement() { - p.addElement() - p.originalIM = p.im - p.im = textIM -} - -// generateImpliedEndTags pops nodes off the stack of open elements as long as -// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc. -// If exceptions are specified, nodes with that name will not be popped off. -func (p *parser) generateImpliedEndTags(exceptions ...string) { - var i int -loop: - for i = len(p.oe) - 1; i >= 0; i-- { - n := p.oe[i] - if n.Type != ElementNode { - break - } - switch n.DataAtom { - case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc: - for _, except := range exceptions { - if n.Data == except { - break loop - } - } - continue - } - break - } - - p.oe = p.oe[:i+1] -} - -// addChild adds a child node n to the top element, and pushes n onto the stack -// of open elements if it is an element node. -func (p *parser) addChild(n *Node) { - if p.shouldFosterParent() { - p.fosterParent(n) - } else { - p.top().AppendChild(n) - } - - if n.Type == ElementNode { - p.oe = append(p.oe, n) - } -} - -// shouldFosterParent returns whether the next node to be added should be -// foster parented. -func (p *parser) shouldFosterParent() bool { - if p.fosterParenting { - switch p.top().DataAtom { - case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: - return true - } - } - return false -} - -// fosterParent adds a child node according to the foster parenting rules. -// Section 12.2.6.1, "foster parenting". -func (p *parser) fosterParent(n *Node) { - var table, parent, prev, template *Node - var i int - for i = len(p.oe) - 1; i >= 0; i-- { - if p.oe[i].DataAtom == a.Table { - table = p.oe[i] - break - } - } - - var j int - for j = len(p.oe) - 1; j >= 0; j-- { - if p.oe[j].DataAtom == a.Template { - template = p.oe[j] - break - } - } - - if template != nil && (table == nil || j > i) { - template.AppendChild(n) - return - } - - if table == nil { - // The foster parent is the html element. - parent = p.oe[0] - } else { - parent = table.Parent - } - if parent == nil { - parent = p.oe[i-1] - } - - if table != nil { - prev = table.PrevSibling - } else { - prev = parent.LastChild - } - if prev != nil && prev.Type == TextNode && n.Type == TextNode { - prev.Data += n.Data - return - } - - parent.InsertBefore(n, table) -} - -// addText adds text to the preceding node if it is a text node, or else it -// calls addChild with a new text node. -func (p *parser) addText(text string) { - if text == "" { - return - } - - if p.shouldFosterParent() { - p.fosterParent(&Node{ - Type: TextNode, - Data: text, - }) - return - } - - t := p.top() - if n := t.LastChild; n != nil && n.Type == TextNode { - n.Data += text - return - } - p.addChild(&Node{ - Type: TextNode, - Data: text, - }) -} - -// addElement adds a child element based on the current token. -func (p *parser) addElement() { - p.addChild(&Node{ - Type: ElementNode, - DataAtom: p.tok.DataAtom, - Data: p.tok.Data, - Attr: p.tok.Attr, - }) -} - -// Section 12.2.4.3. -func (p *parser) addFormattingElement() { - tagAtom, attr := p.tok.DataAtom, p.tok.Attr - p.addElement() - - // Implement the Noah's Ark clause, but with three per family instead of two. - identicalElements := 0 -findIdenticalElements: - for i := len(p.afe) - 1; i >= 0; i-- { - n := p.afe[i] - if n.Type == scopeMarkerNode { - break - } - if n.Type != ElementNode { - continue - } - if n.Namespace != "" { - continue - } - if n.DataAtom != tagAtom { - continue - } - if len(n.Attr) != len(attr) { - continue - } - compareAttributes: - for _, t0 := range n.Attr { - for _, t1 := range attr { - if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val { - // Found a match for this attribute, continue with the next attribute. - continue compareAttributes - } - } - // If we get here, there is no attribute that matches a. - // Therefore the element is not identical to the new one. - continue findIdenticalElements - } - - identicalElements++ - if identicalElements >= 3 { - p.afe.remove(n) - } - } - - p.afe = append(p.afe, p.top()) -} - -// Section 12.2.4.3. -func (p *parser) clearActiveFormattingElements() { - for { - if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode { - return - } - } -} - -// Section 12.2.4.3. -func (p *parser) reconstructActiveFormattingElements() { - n := p.afe.top() - if n == nil { - return - } - if n.Type == scopeMarkerNode || p.oe.index(n) != -1 { - return - } - i := len(p.afe) - 1 - for n.Type != scopeMarkerNode && p.oe.index(n) == -1 { - if i == 0 { - i = -1 - break - } - i-- - n = p.afe[i] - } - for { - i++ - clone := p.afe[i].clone() - p.addChild(clone) - p.afe[i] = clone - if i == len(p.afe)-1 { - break - } - } -} - -// Section 12.2.5. -func (p *parser) acknowledgeSelfClosingTag() { - p.hasSelfClosingToken = false -} - -// An insertion mode (section 12.2.4.1) is the state transition function from -// a particular state in the HTML5 parser's state machine. It updates the -// parser's fields depending on parser.tok (where ErrorToken means EOF). -// It returns whether the token was consumed. -type insertionMode func(*parser) bool - -// setOriginalIM sets the insertion mode to return to after completing a text or -// inTableText insertion mode. -// Section 12.2.4.1, "using the rules for". -func (p *parser) setOriginalIM() { - if p.originalIM != nil { - panic("html: bad parser state: originalIM was set twice") - } - p.originalIM = p.im -} - -// Section 12.2.4.1, "reset the insertion mode". -func (p *parser) resetInsertionMode() { - for i := len(p.oe) - 1; i >= 0; i-- { - n := p.oe[i] - last := i == 0 - if last && p.context != nil { - n = p.context - } - - switch n.DataAtom { - case a.Select: - if !last { - for ancestor, first := n, p.oe[0]; ancestor != first; { - ancestor = p.oe[p.oe.index(ancestor)-1] - switch ancestor.DataAtom { - case a.Template: - p.im = inSelectIM - return - case a.Table: - p.im = inSelectInTableIM - return - } - } - } - p.im = inSelectIM - case a.Td, a.Th: - // TODO: remove this divergence from the HTML5 spec. - // - // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668 - p.im = inCellIM - case a.Tr: - p.im = inRowIM - case a.Tbody, a.Thead, a.Tfoot: - p.im = inTableBodyIM - case a.Caption: - p.im = inCaptionIM - case a.Colgroup: - p.im = inColumnGroupIM - case a.Table: - p.im = inTableIM - case a.Template: - // TODO: remove this divergence from the HTML5 spec. - if n.Namespace != "" { - continue - } - p.im = p.templateStack.top() - case a.Head: - // TODO: remove this divergence from the HTML5 spec. - // - // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668 - p.im = inHeadIM - case a.Body: - p.im = inBodyIM - case a.Frameset: - p.im = inFramesetIM - case a.Html: - if p.head == nil { - p.im = beforeHeadIM - } else { - p.im = afterHeadIM - } - default: - if last { - p.im = inBodyIM - return - } - continue - } - return - } -} - -const whitespace = " \t\r\n\f" - -// Section 12.2.6.4.1. -func initialIM(p *parser) bool { - switch p.tok.Type { - case TextToken: - p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) - if len(p.tok.Data) == 0 { - // It was all whitespace, so ignore it. - return true - } - case CommentToken: - p.doc.AppendChild(&Node{ - Type: CommentNode, - Data: p.tok.Data, - }) - return true - case DoctypeToken: - n, quirks := parseDoctype(p.tok.Data) - p.doc.AppendChild(n) - p.quirks = quirks - p.im = beforeHTMLIM - return true - } - p.quirks = true - p.im = beforeHTMLIM - return false -} - -// Section 12.2.6.4.2. -func beforeHTMLIM(p *parser) bool { - switch p.tok.Type { - case DoctypeToken: - // Ignore the token. - return true - case TextToken: - p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) - if len(p.tok.Data) == 0 { - // It was all whitespace, so ignore it. - return true - } - case StartTagToken: - if p.tok.DataAtom == a.Html { - p.addElement() - p.im = beforeHeadIM - return true - } - case EndTagToken: - switch p.tok.DataAtom { - case a.Head, a.Body, a.Html, a.Br: - p.parseImpliedToken(StartTagToken, a.Html, a.Html.String()) - return false - default: - // Ignore the token. - return true - } - case CommentToken: - p.doc.AppendChild(&Node{ - Type: CommentNode, - Data: p.tok.Data, - }) - return true - } - p.parseImpliedToken(StartTagToken, a.Html, a.Html.String()) - return false -} - -// Section 12.2.6.4.3. -func beforeHeadIM(p *parser) bool { - switch p.tok.Type { - case TextToken: - p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) - if len(p.tok.Data) == 0 { - // It was all whitespace, so ignore it. - return true - } - case StartTagToken: - switch p.tok.DataAtom { - case a.Head: - p.addElement() - p.head = p.top() - p.im = inHeadIM - return true - case a.Html: - return inBodyIM(p) - } - case EndTagToken: - switch p.tok.DataAtom { - case a.Head, a.Body, a.Html, a.Br: - p.parseImpliedToken(StartTagToken, a.Head, a.Head.String()) - return false - default: - // Ignore the token. - return true - } - case CommentToken: - p.addChild(&Node{ - Type: CommentNode, - Data: p.tok.Data, - }) - return true - case DoctypeToken: - // Ignore the token. - return true - } - - p.parseImpliedToken(StartTagToken, a.Head, a.Head.String()) - return false -} - -// Section 12.2.6.4.4. -func inHeadIM(p *parser) bool { - switch p.tok.Type { - case TextToken: - s := strings.TrimLeft(p.tok.Data, whitespace) - if len(s) < len(p.tok.Data) { - // Add the initial whitespace to the current node. - p.addText(p.tok.Data[:len(p.tok.Data)-len(s)]) - if s == "" { - return true - } - p.tok.Data = s - } - case StartTagToken: - switch p.tok.DataAtom { - case a.Html: - return inBodyIM(p) - case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta: - p.addElement() - p.oe.pop() - p.acknowledgeSelfClosingTag() - return true - case a.Noscript: - if p.scripting { - p.parseGenericRawTextElement() - return true - } - p.addElement() - p.im = inHeadNoscriptIM - // Don't let the tokenizer go into raw text mode when scripting is disabled. - p.tokenizer.NextIsNotRawText() - return true - case a.Script, a.Title: - p.addElement() - p.setOriginalIM() - p.im = textIM - return true - case a.Noframes, a.Style: - p.parseGenericRawTextElement() - return true - case a.Head: - // Ignore the token. - return true - case a.Template: - // TODO: remove this divergence from the HTML5 spec. - // - // We don't handle all of the corner cases when mixing foreign - // content (i.e. or ) with tag. - case a.Template: - return inHeadIM(p) - default: - // Ignore the token. - return true - } - case CommentToken: - p.addChild(&Node{ - Type: CommentNode, - Data: p.tok.Data, - }) - return true - case DoctypeToken: - // Ignore the token. - return true - } - - p.parseImpliedToken(StartTagToken, a.Body, a.Body.String()) - p.framesetOK = true - return false -} - -// copyAttributes copies attributes of src not found on dst to dst. -func copyAttributes(dst *Node, src Token) { - if len(src.Attr) == 0 { - return - } - attr := map[string]string{} - for _, t := range dst.Attr { - attr[t.Key] = t.Val - } - for _, t := range src.Attr { - if _, ok := attr[t.Key]; !ok { - dst.Attr = append(dst.Attr, t) - attr[t.Key] = t.Val - } - } -} - -// Section 12.2.6.4.7. -func inBodyIM(p *parser) bool { - switch p.tok.Type { - case TextToken: - d := p.tok.Data - switch n := p.oe.top(); n.DataAtom { - case a.Pre, a.Listing: - if n.FirstChild == nil { - // Ignore a newline at the start of a
 block.
-				if d != "" && d[0] == '\r' {
-					d = d[1:]
-				}
-				if d != "" && d[0] == '\n' {
-					d = d[1:]
-				}
-			}
-		}
-		d = strings.Replace(d, "\x00", "", -1)
-		if d == "" {
-			return true
-		}
-		p.reconstructActiveFormattingElements()
-		p.addText(d)
-		if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
-			// There were non-whitespace characters inserted.
-			p.framesetOK = false
-		}
-	case StartTagToken:
-		switch p.tok.DataAtom {
-		case a.Html:
-			if p.oe.contains(a.Template) {
-				return true
-			}
-			copyAttributes(p.oe[0], p.tok)
-		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
-			return inHeadIM(p)
-		case a.Body:
-			if p.oe.contains(a.Template) {
-				return true
-			}
-			if len(p.oe) >= 2 {
-				body := p.oe[1]
-				if body.Type == ElementNode && body.DataAtom == a.Body {
-					p.framesetOK = false
-					copyAttributes(body, p.tok)
-				}
-			}
-		case a.Frameset:
-			if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
-				// Ignore the token.
-				return true
-			}
-			body := p.oe[1]
-			if body.Parent != nil {
-				body.Parent.RemoveChild(body)
-			}
-			p.oe = p.oe[:1]
-			p.addElement()
-			p.im = inFramesetIM
-			return true
-		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
-			p.popUntil(buttonScope, a.P)
-			p.addElement()
-		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
-			p.popUntil(buttonScope, a.P)
-			switch n := p.top(); n.DataAtom {
-			case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
-				p.oe.pop()
-			}
-			p.addElement()
-		case a.Pre, a.Listing:
-			p.popUntil(buttonScope, a.P)
-			p.addElement()
-			// The newline, if any, will be dealt with by the TextToken case.
-			p.framesetOK = false
-		case a.Form:
-			if p.form != nil && !p.oe.contains(a.Template) {
-				// Ignore the token
-				return true
-			}
-			p.popUntil(buttonScope, a.P)
-			p.addElement()
-			if !p.oe.contains(a.Template) {
-				p.form = p.top()
-			}
-		case a.Li:
-			p.framesetOK = false
-			for i := len(p.oe) - 1; i >= 0; i-- {
-				node := p.oe[i]
-				switch node.DataAtom {
-				case a.Li:
-					p.oe = p.oe[:i]
-				case a.Address, a.Div, a.P:
-					continue
-				default:
-					if !isSpecialElement(node) {
-						continue
-					}
-				}
-				break
-			}
-			p.popUntil(buttonScope, a.P)
-			p.addElement()
-		case a.Dd, a.Dt:
-			p.framesetOK = false
-			for i := len(p.oe) - 1; i >= 0; i-- {
-				node := p.oe[i]
-				switch node.DataAtom {
-				case a.Dd, a.Dt:
-					p.oe = p.oe[:i]
-				case a.Address, a.Div, a.P:
-					continue
-				default:
-					if !isSpecialElement(node) {
-						continue
-					}
-				}
-				break
-			}
-			p.popUntil(buttonScope, a.P)
-			p.addElement()
-		case a.Plaintext:
-			p.popUntil(buttonScope, a.P)
-			p.addElement()
-		case a.Button:
-			p.popUntil(defaultScope, a.Button)
-			p.reconstructActiveFormattingElements()
-			p.addElement()
-			p.framesetOK = false
-		case a.A:
-			for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
-				if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
-					p.inBodyEndTagFormatting(a.A, "a")
-					p.oe.remove(n)
-					p.afe.remove(n)
-					break
-				}
-			}
-			p.reconstructActiveFormattingElements()
-			p.addFormattingElement()
-		case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
-			p.reconstructActiveFormattingElements()
-			p.addFormattingElement()
-		case a.Nobr:
-			p.reconstructActiveFormattingElements()
-			if p.elementInScope(defaultScope, a.Nobr) {
-				p.inBodyEndTagFormatting(a.Nobr, "nobr")
-				p.reconstructActiveFormattingElements()
-			}
-			p.addFormattingElement()
-		case a.Applet, a.Marquee, a.Object:
-			p.reconstructActiveFormattingElements()
-			p.addElement()
-			p.afe = append(p.afe, &scopeMarker)
-			p.framesetOK = false
-		case a.Table:
-			if !p.quirks {
-				p.popUntil(buttonScope, a.P)
-			}
-			p.addElement()
-			p.framesetOK = false
-			p.im = inTableIM
-			return true
-		case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
-			p.reconstructActiveFormattingElements()
-			p.addElement()
-			p.oe.pop()
-			p.acknowledgeSelfClosingTag()
-			if p.tok.DataAtom == a.Input {
-				for _, t := range p.tok.Attr {
-					if t.Key == "type" {
-						if strings.ToLower(t.Val) == "hidden" {
-							// Skip setting framesetOK = false
-							return true
-						}
-					}
-				}
-			}
-			p.framesetOK = false
-		case a.Param, a.Source, a.Track:
-			p.addElement()
-			p.oe.pop()
-			p.acknowledgeSelfClosingTag()
-		case a.Hr:
-			p.popUntil(buttonScope, a.P)
-			p.addElement()
-			p.oe.pop()
-			p.acknowledgeSelfClosingTag()
-			p.framesetOK = false
-		case a.Image:
-			p.tok.DataAtom = a.Img
-			p.tok.Data = a.Img.String()
-			return false
-		case a.Textarea:
-			p.addElement()
-			p.setOriginalIM()
-			p.framesetOK = false
-			p.im = textIM
-		case a.Xmp:
-			p.popUntil(buttonScope, a.P)
-			p.reconstructActiveFormattingElements()
-			p.framesetOK = false
-			p.parseGenericRawTextElement()
-		case a.Iframe:
-			p.framesetOK = false
-			p.parseGenericRawTextElement()
-		case a.Noembed:
-			p.parseGenericRawTextElement()
-		case a.Noscript:
-			if p.scripting {
-				p.parseGenericRawTextElement()
-				return true
-			}
-			p.reconstructActiveFormattingElements()
-			p.addElement()
-			// Don't let the tokenizer go into raw text mode when scripting is disabled.
-			p.tokenizer.NextIsNotRawText()
-		case a.Select:
-			p.reconstructActiveFormattingElements()
-			p.addElement()
-			p.framesetOK = false
-			p.im = inSelectIM
-			return true
-		case a.Optgroup, a.Option:
-			if p.top().DataAtom == a.Option {
-				p.oe.pop()
-			}
-			p.reconstructActiveFormattingElements()
-			p.addElement()
-		case a.Rb, a.Rtc:
-			if p.elementInScope(defaultScope, a.Ruby) {
-				p.generateImpliedEndTags()
-			}
-			p.addElement()
-		case a.Rp, a.Rt:
-			if p.elementInScope(defaultScope, a.Ruby) {
-				p.generateImpliedEndTags("rtc")
-			}
-			p.addElement()
-		case a.Math, a.Svg:
-			p.reconstructActiveFormattingElements()
-			if p.tok.DataAtom == a.Math {
-				adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
-			} else {
-				adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
-			}
-			adjustForeignAttributes(p.tok.Attr)
-			p.addElement()
-			p.top().Namespace = p.tok.Data
-			if p.hasSelfClosingToken {
-				p.oe.pop()
-				p.acknowledgeSelfClosingTag()
-			}
-			return true
-		case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
-			// Ignore the token.
-		default:
-			p.reconstructActiveFormattingElements()
-			p.addElement()
-		}
-	case EndTagToken:
-		switch p.tok.DataAtom {
-		case a.Body:
-			if p.elementInScope(defaultScope, a.Body) {
-				p.im = afterBodyIM
-			}
-		case a.Html:
-			if p.elementInScope(defaultScope, a.Body) {
-				p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
-				return false
-			}
-			return true
-		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
-			p.popUntil(defaultScope, p.tok.DataAtom)
-		case a.Form:
-			if p.oe.contains(a.Template) {
-				i := p.indexOfElementInScope(defaultScope, a.Form)
-				if i == -1 {
-					// Ignore the token.
-					return true
-				}
-				p.generateImpliedEndTags()
-				if p.oe[i].DataAtom != a.Form {
-					// Ignore the token.
-					return true
-				}
-				p.popUntil(defaultScope, a.Form)
-			} else {
-				node := p.form
-				p.form = nil
-				i := p.indexOfElementInScope(defaultScope, a.Form)
-				if node == nil || i == -1 || p.oe[i] != node {
-					// Ignore the token.
-					return true
-				}
-				p.generateImpliedEndTags()
-				p.oe.remove(node)
-			}
-		case a.P:
-			if !p.elementInScope(buttonScope, a.P) {
-				p.parseImpliedToken(StartTagToken, a.P, a.P.String())
-			}
-			p.popUntil(buttonScope, a.P)
-		case a.Li:
-			p.popUntil(listItemScope, a.Li)
-		case a.Dd, a.Dt:
-			p.popUntil(defaultScope, p.tok.DataAtom)
-		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
-			p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
-		case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
-			p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
-		case a.Applet, a.Marquee, a.Object:
-			if p.popUntil(defaultScope, p.tok.DataAtom) {
-				p.clearActiveFormattingElements()
-			}
-		case a.Br:
-			p.tok.Type = StartTagToken
-			return false
-		case a.Template:
-			return inHeadIM(p)
-		default:
-			p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
-		}
-	case CommentToken:
-		p.addChild(&Node{
-			Type: CommentNode,
-			Data: p.tok.Data,
-		})
-	case ErrorToken:
-		// TODO: remove this divergence from the HTML5 spec.
-		if len(p.templateStack) > 0 {
-			p.im = inTemplateIM
-			return false
-		}
-		for _, e := range p.oe {
-			switch e.DataAtom {
-			case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
-				a.Thead, a.Tr, a.Body, a.Html:
-			default:
-				return true
-			}
-		}
-	}
-
-	return true
-}
-
-func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
-	// This is the "adoption agency" algorithm, described at
-	// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
-
-	// TODO: this is a fairly literal line-by-line translation of that algorithm.
-	// Once the code successfully parses the comprehensive test suite, we should
-	// refactor this code to be more idiomatic.
-
-	// Steps 1-2
-	if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
-		p.oe.pop()
-		return
-	}
-
-	// Steps 3-5. The outer loop.
-	for i := 0; i < 8; i++ {
-		// Step 6. Find the formatting element.
-		var formattingElement *Node
-		for j := len(p.afe) - 1; j >= 0; j-- {
-			if p.afe[j].Type == scopeMarkerNode {
-				break
-			}
-			if p.afe[j].DataAtom == tagAtom {
-				formattingElement = p.afe[j]
-				break
-			}
-		}
-		if formattingElement == nil {
-			p.inBodyEndTagOther(tagAtom, tagName)
-			return
-		}
-
-		// Step 7. Ignore the tag if formatting element is not in the stack of open elements.
-		feIndex := p.oe.index(formattingElement)
-		if feIndex == -1 {
-			p.afe.remove(formattingElement)
-			return
-		}
-		// Step 8. Ignore the tag if formatting element is not in the scope.
-		if !p.elementInScope(defaultScope, tagAtom) {
-			// Ignore the tag.
-			return
-		}
-
-		// Step 9. This step is omitted because it's just a parse error but no need to return.
-
-		// Steps 10-11. Find the furthest block.
-		var furthestBlock *Node
-		for _, e := range p.oe[feIndex:] {
-			if isSpecialElement(e) {
-				furthestBlock = e
-				break
-			}
-		}
-		if furthestBlock == nil {
-			e := p.oe.pop()
-			for e != formattingElement {
-				e = p.oe.pop()
-			}
-			p.afe.remove(e)
-			return
-		}
-
-		// Steps 12-13. Find the common ancestor and bookmark node.
-		commonAncestor := p.oe[feIndex-1]
-		bookmark := p.afe.index(formattingElement)
-
-		// Step 14. The inner loop. Find the lastNode to reparent.
-		lastNode := furthestBlock
-		node := furthestBlock
-		x := p.oe.index(node)
-		// Step 14.1.
-		j := 0
-		for {
-			// Step 14.2.
-			j++
-			// Step. 14.3.
-			x--
-			node = p.oe[x]
-			// Step 14.4. Go to the next step if node is formatting element.
-			if node == formattingElement {
-				break
-			}
-			// Step 14.5. Remove node from the list of active formatting elements if
-			// inner loop counter is greater than three and node is in the list of
-			// active formatting elements.
-			if ni := p.afe.index(node); j > 3 && ni > -1 {
-				p.afe.remove(node)
-				// If any element of the list of active formatting elements is removed,
-				// we need to take care whether bookmark should be decremented or not.
-				// This is because the value of bookmark may exceed the size of the
-				// list by removing elements from the list.
-				if ni <= bookmark {
-					bookmark--
-				}
-				continue
-			}
-			// Step 14.6. Continue the next inner loop if node is not in the list of
-			// active formatting elements.
-			if p.afe.index(node) == -1 {
-				p.oe.remove(node)
-				continue
-			}
-			// Step 14.7.
-			clone := node.clone()
-			p.afe[p.afe.index(node)] = clone
-			p.oe[p.oe.index(node)] = clone
-			node = clone
-			// Step 14.8.
-			if lastNode == furthestBlock {
-				bookmark = p.afe.index(node) + 1
-			}
-			// Step 14.9.
-			if lastNode.Parent != nil {
-				lastNode.Parent.RemoveChild(lastNode)
-			}
-			node.AppendChild(lastNode)
-			// Step 14.10.
-			lastNode = node
-		}
-
-		// Step 15. Reparent lastNode to the common ancestor,
-		// or for misnested table nodes, to the foster parent.
-		if lastNode.Parent != nil {
-			lastNode.Parent.RemoveChild(lastNode)
-		}
-		switch commonAncestor.DataAtom {
-		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
-			p.fosterParent(lastNode)
-		default:
-			commonAncestor.AppendChild(lastNode)
-		}
-
-		// Steps 16-18. Reparent nodes from the furthest block's children
-		// to a clone of the formatting element.
-		clone := formattingElement.clone()
-		reparentChildren(clone, furthestBlock)
-		furthestBlock.AppendChild(clone)
-
-		// Step 19. Fix up the list of active formatting elements.
-		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
-			// Move the bookmark with the rest of the list.
-			bookmark--
-		}
-		p.afe.remove(formattingElement)
-		p.afe.insert(bookmark, clone)
-
-		// Step 20. Fix up the stack of open elements.
-		p.oe.remove(formattingElement)
-		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
-	}
-}
-
-// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
-// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
-// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
-func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
-	for i := len(p.oe) - 1; i >= 0; i-- {
-		// Two element nodes have the same tag if they have the same Data (a
-		// string-typed field). As an optimization, for common HTML tags, each
-		// Data string is assigned a unique, non-zero DataAtom (a uint32-typed
-		// field), since integer comparison is faster than string comparison.
-		// Uncommon (custom) tags get a zero DataAtom.
-		//
-		// The if condition here is equivalent to (p.oe[i].Data == tagName).
-		if (p.oe[i].DataAtom == tagAtom) &&
-			((tagAtom != 0) || (p.oe[i].Data == tagName)) {
-			p.oe = p.oe[:i]
-			break
-		}
-		if isSpecialElement(p.oe[i]) {
-			break
-		}
-	}
-}
-
-// Section 12.2.6.4.8.
-func textIM(p *parser) bool {
-	switch p.tok.Type {
-	case ErrorToken:
-		p.oe.pop()
-	case TextToken:
-		d := p.tok.Data
-		if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
-			// Ignore a newline at the start of a