up go mod, go mod vendor & go mod tidy
This commit is contained in:
parent
4ae10f08b2
commit
ee716d3f7e
20
go.mod
20
go.mod
|
@ -8,14 +8,16 @@ require (
|
|||
github.com/Azure/go-ntlmssp v0.0.0-20180810175552-4a21cbd618b4 // indirect
|
||||
github.com/ChrisTrenkamp/goxpath v0.0.0-20170625215350-4fe035839290
|
||||
github.com/NaverCloudPlatform/ncloud-sdk-go v0.0.0-20180110055012-c2e73f942591
|
||||
github.com/PuerkitoBio/goquery v1.5.0 // indirect
|
||||
github.com/Telmate/proxmox-api-go v0.0.0-20190410200643-f08824d5082d
|
||||
github.com/abdullin/seq v0.0.0-20160510034733-d5467c17e7af // indirect
|
||||
github.com/aliyun/alibaba-cloud-sdk-go v0.0.0-20190418113227-25233c783f4e
|
||||
github.com/aliyun/aliyun-oss-go-sdk v0.0.0-20170113022742-e6dbea820a9f
|
||||
github.com/antchfx/htmlquery v1.0.0 // indirect
|
||||
github.com/antchfx/xmlquery v1.0.0 // indirect
|
||||
github.com/antchfx/xpath v0.0.0-20170728053731-b5c552e1acbd // indirect
|
||||
github.com/antchfx/xquery v0.0.0-20170730121040-eb8c3c172607 // indirect
|
||||
github.com/antihax/optional v0.0.0-20180407024304-ca021399b1a6 // indirect
|
||||
github.com/apache/thrift v0.12.0 // indirect
|
||||
github.com/approvals/go-approval-tests v0.0.0-20160714161514-ad96e53bea43
|
||||
github.com/armon/go-radix v1.0.0 // indirect
|
||||
github.com/aws/aws-sdk-go v1.16.24
|
||||
|
@ -29,8 +31,13 @@ require (
|
|||
github.com/docker/docker v0.0.0-20180422163414-57142e89befe // indirect
|
||||
github.com/dylanmei/iso8601 v0.1.0 // indirect
|
||||
github.com/dylanmei/winrmtest v0.0.0-20170819153634-c2fbb09e6c08
|
||||
github.com/fatih/camelcase v1.0.0
|
||||
github.com/fatih/structtag v1.0.0
|
||||
github.com/go-ini/ini v1.25.4
|
||||
github.com/gobwas/glob v0.2.3 // indirect
|
||||
github.com/gocolly/colly v1.2.0
|
||||
github.com/gofrs/flock v0.7.1
|
||||
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db // indirect
|
||||
github.com/google/go-cmp v0.2.0
|
||||
github.com/google/shlex v0.0.0-20150127133951-6f45313302b9
|
||||
github.com/google/uuid v1.0.0
|
||||
|
@ -59,6 +66,7 @@ require (
|
|||
github.com/json-iterator/go v1.1.6 // indirect
|
||||
github.com/jtolds/gls v4.2.1+incompatible // indirect
|
||||
github.com/kardianos/osext v0.0.0-20170510131534-ae77be60afb1
|
||||
github.com/kennygrant/sanitize v1.2.4 // indirect
|
||||
github.com/klauspost/compress v0.0.0-20160131094358-f86d2e6d8a77 // indirect
|
||||
github.com/klauspost/cpuid v0.0.0-20160106104451-349c67577817 // indirect
|
||||
github.com/klauspost/crc32 v0.0.0-20160114101742-999f3125931f // indirect
|
||||
|
@ -66,7 +74,6 @@ require (
|
|||
github.com/kr/fs v0.0.0-20131111012553-2788f0dbd169 // indirect
|
||||
github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348 // indirect
|
||||
github.com/linode/linodego v0.7.1
|
||||
github.com/marstr/guid v0.0.0-20170427235115-8bdf7d1a087c // indirect
|
||||
github.com/masterzen/azure-sdk-for-go v0.0.0-20161014135628-ee4f0065d00c // indirect
|
||||
github.com/masterzen/simplexml v0.0.0-20190410153822-31eea3082786 // indirect
|
||||
github.com/masterzen/winrm v0.0.0-20180224160350-7e40f93ae939
|
||||
|
@ -81,7 +88,6 @@ require (
|
|||
github.com/mitchellh/panicwrap v0.0.0-20170106182340-fce601fe5557
|
||||
github.com/mitchellh/prefixedio v0.0.0-20151214002211-6e6954073784
|
||||
github.com/mitchellh/reflectwalk v1.0.0
|
||||
github.com/mna/pigeon v1.0.0 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.1 // indirect
|
||||
github.com/moul/anonuuid v0.0.0-20160222162117-609b752a95ef // indirect
|
||||
|
@ -89,7 +95,8 @@ require (
|
|||
github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32 // indirect
|
||||
github.com/nu7hatch/gouuid v0.0.0-20131221200532-179d4d0c4d8d // indirect
|
||||
github.com/olekukonko/tablewriter v0.0.0-20180105111133-96aac992fc8b
|
||||
github.com/openzipkin/zipkin-go v0.1.6 // indirect
|
||||
github.com/onsi/ginkgo v1.7.0 // indirect
|
||||
github.com/onsi/gomega v1.4.3 // indirect
|
||||
github.com/oracle/oci-go-sdk v1.8.0
|
||||
github.com/packer-community/winrmcp v0.0.0-20180921204643-0fd363d6159a
|
||||
github.com/pierrec/lz4 v2.0.5+incompatible
|
||||
|
@ -97,15 +104,17 @@ require (
|
|||
github.com/pkg/sftp v0.0.0-20160118190721-e84cc8c755ca
|
||||
github.com/posener/complete v1.1.1
|
||||
github.com/profitbricks/profitbricks-sdk-go v4.0.2+incompatible
|
||||
github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829 // indirect
|
||||
github.com/renstrom/fuzzysearch v0.0.0-20160331204855-2d205ac6ec17 // indirect
|
||||
github.com/rwtodd/Go.Sed v0.0.0-20170507045331-d6d5d585814e
|
||||
github.com/ryanuber/go-glob v0.0.0-20170128012129-256dc444b735 // indirect
|
||||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
|
||||
github.com/satori/go.uuid v1.2.0 // indirect
|
||||
github.com/scaleway/scaleway-cli v0.0.0-20180921094345-7b12c9699d70
|
||||
github.com/sirupsen/logrus v1.2.0 // indirect
|
||||
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d // indirect
|
||||
github.com/smartystreets/goconvey v0.0.0-20181108003508-044398e4856c // indirect
|
||||
github.com/stretchr/testify v1.3.0
|
||||
github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea // indirect
|
||||
github.com/tencentcloud/tencentcloud-sdk-go v0.0.0-20181220135002-f1744d40d346
|
||||
github.com/ugorji/go v0.0.0-20151218193438-646ae4a518c1
|
||||
github.com/ulikunitz/xz v0.5.5
|
||||
|
@ -119,7 +128,6 @@ require (
|
|||
golang.org/x/sync v0.0.0-20190423024810-112230192c58
|
||||
golang.org/x/sys v0.0.0-20190425145619-16072639606e
|
||||
golang.org/x/text v0.3.1 // indirect
|
||||
golang.org/x/tools v0.0.0-20190530184349-ce1a3806b557 // indirect
|
||||
google.golang.org/api v0.4.0
|
||||
google.golang.org/grpc v1.20.1
|
||||
gopkg.in/h2non/gock.v1 v1.0.12 // indirect
|
||||
|
|
74
go.sum
74
go.sum
|
@ -3,8 +3,6 @@ cloud.google.com/go v0.31.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT
|
|||
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
|
||||
cloud.google.com/go v0.36.0 h1:+aCSj7tOo2LODWVEuZDZeGCckdt6MlSF+X/rB3wUiS8=
|
||||
cloud.google.com/go v0.36.0/go.mod h1:RUoy9p/M4ge0HzT8L+SDZ8jg+Q6fth0CiBuhFJpSV40=
|
||||
contrib.go.opencensus.io/exporter/ocagent v0.4.12 h1:jGFvw3l57ViIVEPKKEUXPcLYIXJmQxLUh6ey1eJhwyc=
|
||||
contrib.go.opencensus.io/exporter/ocagent v0.4.12/go.mod h1:450APlNTSR6FrvC3CTRqYosuDstRB9un7SOx2k/9ckA=
|
||||
contrib.go.opencensus.io/exporter/ocagent v0.5.0 h1:TKXjQSRS0/cCDrP7KvkgU6SmILtF/yV2TOs/02K/WZQ=
|
||||
contrib.go.opencensus.io/exporter/ocagent v0.5.0/go.mod h1:ImxhfLRpxoYiSq891pBrLVhN+qmP8BTVvdH2YLs7Gl0=
|
||||
dmitri.shuralyov.com/app/changes v0.0.0-20180602232624-0a106ad413e3/go.mod h1:Yl+fi1br7+Rr3LqpNJf1/uxUdtRUV+Tnj0o93V2B9MU=
|
||||
|
@ -14,16 +12,10 @@ dmitri.shuralyov.com/state v0.0.0-20180228185332-28bcc343414c/go.mod h1:0PRwlb0D
|
|||
git.apache.org/thrift.git v0.0.0-20180902110319-2566ecd5d999/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqblrnkyeyg=
|
||||
github.com/1and1/oneandone-cloudserver-sdk-go v1.0.1 h1:RMTyvS5bjvSWiUcfqfr/E2pxHEMrALvU+E12n6biymg=
|
||||
github.com/1and1/oneandone-cloudserver-sdk-go v1.0.1/go.mod h1:61apmbkVJH4kg+38ftT+/l0XxdUCVnHggqcOTqZRSEE=
|
||||
github.com/Azure/azure-sdk-for-go v27.3.0+incompatible h1:i+ROfG3CsZUPoVAnhK06T3R6PmBzKB9ds+lHBpN7Mzo=
|
||||
github.com/Azure/azure-sdk-for-go v27.3.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
|
||||
github.com/Azure/azure-sdk-for-go v30.0.0+incompatible h1:6o1Yzl7wTBYg+xw0pY4qnalaPmEQolubEEdepo1/kmI=
|
||||
github.com/Azure/azure-sdk-for-go v30.0.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
|
||||
github.com/Azure/go-autorest v10.12.0+incompatible h1:6YphwUK+oXbzvCc1fd5VrnxCekwzDkpA7gUEbci2MvI=
|
||||
github.com/Azure/go-autorest v10.12.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
|
||||
github.com/Azure/go-autorest v12.0.0+incompatible h1:N+VqClcomLGD/sHb3smbSYYtNMgKpVV3Cd5r5i8z6bQ=
|
||||
github.com/Azure/go-autorest v12.0.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
|
||||
github.com/Azure/go-autorest/tracing v0.1.0 h1:TRBxC5Pj/fIuh4Qob0ZpkggbfT8RC0SubHbpV3p4/Vc=
|
||||
github.com/Azure/go-autorest/tracing v0.1.0/go.mod h1:ROEEAFwXycQw7Sn3DXNtEedEvdeRAgDr0izn4z5Ij88=
|
||||
github.com/Azure/go-ntlmssp v0.0.0-20180810175552-4a21cbd618b4 h1:pSm8mp0T2OH2CPmPDPtwHPr3VAQaOwVF/JbllOPP4xA=
|
||||
github.com/Azure/go-ntlmssp v0.0.0-20180810175552-4a21cbd618b4/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
|
@ -31,26 +23,29 @@ github.com/ChrisTrenkamp/goxpath v0.0.0-20170625215350-4fe035839290 h1:K9I21XUHN
|
|||
github.com/ChrisTrenkamp/goxpath v0.0.0-20170625215350-4fe035839290/go.mod h1:nuWgzSkT5PnyOd+272uUmV0dnAnAn42Mk7PiQC5VzN4=
|
||||
github.com/NaverCloudPlatform/ncloud-sdk-go v0.0.0-20180110055012-c2e73f942591 h1:/P9HCl71+Eh6vDbKNyRu+rpIIR70UCZWNOGexVV3e6k=
|
||||
github.com/NaverCloudPlatform/ncloud-sdk-go v0.0.0-20180110055012-c2e73f942591/go.mod h1:EHGzQGbwozJBj/4qj3WGrTJ0FqjgOTOxLQ0VNWvPn08=
|
||||
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=
|
||||
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI=
|
||||
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
|
||||
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
|
||||
github.com/Telmate/proxmox-api-go v0.0.0-20190410200643-f08824d5082d h1:igrCnHheXb+lZ1bW9Ths8JZZIjh9D4Vi/49JqiHE+cI=
|
||||
github.com/Telmate/proxmox-api-go v0.0.0-20190410200643-f08824d5082d/go.mod h1:OGWyIMJ87/k/GCz8CGiWB2HOXsOVDM6Lpe/nFPkC4IQ=
|
||||
github.com/abdullin/seq v0.0.0-20160510034733-d5467c17e7af h1:DBNMBMuMiWYu0b+8KMJuWmfCkcxl09JwdlqwDZZ6U14=
|
||||
github.com/abdullin/seq v0.0.0-20160510034733-d5467c17e7af/go.mod h1:5Jv4cbFiHJMsVxt52+i0Ha45fjshj6wxYr1r19tB9bw=
|
||||
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
github.com/aliyun/alibaba-cloud-sdk-go v0.0.0-20190418113227-25233c783f4e h1:/8wOj52pewmIX/8d5eVO3t7Rr3astkBI/ruyg4WNqRo=
|
||||
github.com/aliyun/alibaba-cloud-sdk-go v0.0.0-20190418113227-25233c783f4e/go.mod h1:T9M45xf79ahXVelWoOBmH0y4aC1t5kXO5BxwyakgIGA=
|
||||
github.com/aliyun/aliyun-oss-go-sdk v0.0.0-20170113022742-e6dbea820a9f h1:jI4DIE5Vf4oRaHfthB0oRhU+yuYuoOTurDzwAlskP00=
|
||||
github.com/aliyun/aliyun-oss-go-sdk v0.0.0-20170113022742-e6dbea820a9f/go.mod h1:T/Aws4fEfogEE9v+HPhhw+CntffsBHJ8nXQCwKr0/g8=
|
||||
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
|
||||
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||
github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c=
|
||||
github.com/antchfx/htmlquery v1.0.0 h1:O5IXz8fZF3B3MW+B33MZWbTHBlYmcfw0BAxgErHuaMA=
|
||||
github.com/antchfx/htmlquery v1.0.0/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8=
|
||||
github.com/antchfx/xmlquery v1.0.0 h1:YuEPqexGG2opZKNc9JU3Zw6zFXwC47wNcy6/F8oKsrM=
|
||||
github.com/antchfx/xmlquery v1.0.0/go.mod h1:/+CnyD/DzHRnv2eRxrVbieRU/FIF6N0C+7oTtyUtCKk=
|
||||
github.com/antchfx/xpath v0.0.0-20170728053731-b5c552e1acbd h1:S3Fr6QnkpW9VRjiEY4psQHhhbbahASuNVj52YIce7lI=
|
||||
github.com/antchfx/xpath v0.0.0-20170728053731-b5c552e1acbd/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
|
||||
github.com/antchfx/xquery v0.0.0-20170730121040-eb8c3c172607 h1:BFFG6KP8ASFBg2ptWsJn8p8RDufBjBDKIxLU7BTYGOM=
|
||||
github.com/antchfx/xquery v0.0.0-20170730121040-eb8c3c172607/go.mod h1:LzD22aAzDP8/dyiCKFp31He4m2GPjl0AFyzDtZzUu9M=
|
||||
github.com/antihax/optional v0.0.0-20180407024304-ca021399b1a6 h1:uZuxRZCz65cG1o6K/xUqImNcYKtmk9ylqaH0itMSvzA=
|
||||
github.com/antihax/optional v0.0.0-20180407024304-ca021399b1a6/go.mod h1:V8iCPQYkqmusNa815XgQio277wI47sdRh1dUOLdyC6Q=
|
||||
github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
|
||||
github.com/approvals/go-approval-tests v0.0.0-20160714161514-ad96e53bea43 h1:ePCAQPf5tUc5IMcUvu6euhSGna7jzs7eiXtJXHig6Zc=
|
||||
github.com/approvals/go-approval-tests v0.0.0-20160714161514-ad96e53bea43/go.mod h1:S6puKjZ9ZeqUPBv2hEBnMZGcM2J6mOsDRQcmxkMAND0=
|
||||
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
|
||||
|
@ -97,11 +92,12 @@ github.com/dylanmei/iso8601 v0.1.0 h1:812NGQDBcqquTfH5Yeo7lwR0nzx/cKdsmf3qMjPURU
|
|||
github.com/dylanmei/iso8601 v0.1.0/go.mod h1:w9KhXSgIyROl1DefbMYIE7UVSIvELTbMrCfx+QkYnoQ=
|
||||
github.com/dylanmei/winrmtest v0.0.0-20170819153634-c2fbb09e6c08 h1:0bp6/GrNOrTDtSXe9YYGCwf8jp5Fb/b+4a6MTRm4qzY=
|
||||
github.com/dylanmei/winrmtest v0.0.0-20170819153634-c2fbb09e6c08/go.mod h1:VBVDFSBXCIW8JaHQpI8lldSKfYaLMzP9oyq6IJ4fhzY=
|
||||
github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs=
|
||||
github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU=
|
||||
github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I=
|
||||
github.com/fatih/camelcase v1.0.0 h1:hxNvNX/xYBp0ovncs8WyWZrOrpBNub/JfaMvbURyft8=
|
||||
github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc=
|
||||
github.com/fatih/color v1.7.0 h1:DkWD4oS2D8LGGgTQ6IvwJJXSL5Vp2ffcQg58nFV38Ys=
|
||||
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
|
||||
github.com/fatih/structtag v1.0.0 h1:pTHj65+u3RKWYPSGaU290FpI/dXxTaHdVwVwbcPKmEc=
|
||||
github.com/fatih/structtag v1.0.0/go.mod h1:IKitwq45uXL/yqi5mYghiD3w9H6eTOvI9vnk8tXMphA=
|
||||
github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc=
|
||||
github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV9I=
|
||||
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
|
||||
|
@ -110,13 +106,13 @@ github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeME
|
|||
github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0=
|
||||
github.com/go-ini/ini v1.25.4 h1:Mujh4R/dH6YL8bxuISne3xX2+qcQ9p0IxKAP6ExWoUo=
|
||||
github.com/go-ini/ini v1.25.4/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8=
|
||||
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
|
||||
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
|
||||
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
|
||||
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
|
||||
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
|
||||
github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI=
|
||||
github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA=
|
||||
github.com/gofrs/flock v0.7.1 h1:DP+LD/t0njgoPBvT5MJLeliUIVQR03hiKR6vezdwHlc=
|
||||
github.com/gofrs/flock v0.7.1/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU=
|
||||
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
|
||||
github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
|
||||
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
|
||||
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
|
||||
github.com/golang/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:tluoj9z5200jBnyusfRPU2LqT6J+DAorxEvtC7LHB+E=
|
||||
|
@ -153,8 +149,6 @@ github.com/gophercloud/utils v0.0.0-20190124192022-a5c25e7a53a6/go.mod h1:wjDF8z
|
|||
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
|
||||
github.com/gopherjs/gopherjs v0.0.0-20181103185306-d547d1d9531e h1:JKmoR8x90Iww1ks85zJ1lfDGgIiMDuIptTOhJq+zKyg=
|
||||
github.com/gopherjs/gopherjs v0.0.0-20181103185306-d547d1d9531e/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
|
||||
github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
|
||||
github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
|
||||
github.com/gorilla/websocket v0.0.0-20170319172727-a91eba7f9777 h1:JIM+OacoOJRU30xpjMf8sulYqjr0ViA3WDrTX6j/yDI=
|
||||
github.com/gorilla/websocket v0.0.0-20170319172727-a91eba7f9777/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ=
|
||||
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
|
||||
|
@ -226,9 +220,10 @@ github.com/json-iterator/go v1.1.6 h1:MrUvLMLTMxbqFJ9kzlvat/rYZqZnW3u4wkLzWTaFwK
|
|||
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
||||
github.com/jtolds/gls v4.2.1+incompatible h1:fSuqC+Gmlu6l/ZYAoZzx2pyucC8Xza35fpRVWLVmUEE=
|
||||
github.com/jtolds/gls v4.2.1+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
|
||||
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
|
||||
github.com/kardianos/osext v0.0.0-20170510131534-ae77be60afb1 h1:PJPDf8OUfOK1bb/NeTKd4f1QXZItOX389VN3B6qC8ro=
|
||||
github.com/kardianos/osext v0.0.0-20170510131534-ae77be60afb1/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8=
|
||||
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
|
||||
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
|
||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||
github.com/klauspost/compress v0.0.0-20160131094358-f86d2e6d8a77 h1:rJnR80lkojFgjdg/oQPhbZoY8t8uM51XMz8DrJrjabk=
|
||||
github.com/klauspost/compress v0.0.0-20160131094358-f86d2e6d8a77/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
||||
|
@ -242,7 +237,6 @@ github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGi
|
|||
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||
github.com/kr/fs v0.0.0-20131111012553-2788f0dbd169 h1:YUrU1/jxRqnt0PSrKj1Uj/wEjk/fjnE80QFfi2Zlj7Q=
|
||||
github.com/kr/fs v0.0.0-20131111012553-2788f0dbd169/go.mod h1:glhvuHOU9Hy7/8PwwdtnarXqLagOX0b/TbZx2zLMqEg=
|
||||
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
|
||||
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
|
@ -253,8 +247,6 @@ github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348 h1:MtvEpTB6LX3v
|
|||
github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348/go.mod h1:B69LEHPfb2qLo0BaaOLcbitczOKLWTsrBG9LczfCD4k=
|
||||
github.com/linode/linodego v0.7.1 h1:4WZmMpSA2NRwlPZcc0+4Gyn7rr99Evk9bnr0B3gXRKE=
|
||||
github.com/linode/linodego v0.7.1/go.mod h1:ga11n3ivecUrPCHN0rANxKmfWBJVkOXfLMZinAbj2sY=
|
||||
github.com/marstr/guid v0.0.0-20170427235115-8bdf7d1a087c h1:N7uWGS2fTwH/4BwxbHiJZNAFTSJ5yPU0emHsQWvkxEY=
|
||||
github.com/marstr/guid v0.0.0-20170427235115-8bdf7d1a087c/go.mod h1:74gB1z2wpxxInTG6yaqA7KrtM0NZ+RbrcqDvYHefzho=
|
||||
github.com/masterzen/azure-sdk-for-go v0.0.0-20161014135628-ee4f0065d00c h1:FMUOnVGy8nWk1cvlMCAoftRItQGMxI0vzJ3dQjeZTCE=
|
||||
github.com/masterzen/azure-sdk-for-go v0.0.0-20161014135628-ee4f0065d00c/go.mod h1:mf8fjOu33zCqxUjuiU3I8S1lJMyEAlH+0F2+M5xl3hE=
|
||||
github.com/masterzen/simplexml v0.0.0-20190410153822-31eea3082786 h1:2ZKn+w/BJeL43sCxI2jhPLRv73oVVOjEKZjKkflyqxg=
|
||||
|
@ -297,8 +289,6 @@ github.com/mitchellh/prefixedio v0.0.0-20151214002211-6e6954073784 h1:+DAetXqxv/
|
|||
github.com/mitchellh/prefixedio v0.0.0-20151214002211-6e6954073784/go.mod h1:kB1naBgV9ORnkiTVeyJOI1DavaJkG4oNIq0Af6ZVKUo=
|
||||
github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY=
|
||||
github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
|
||||
github.com/mna/pigeon v1.0.0 h1:n46IoStjdzjaXuyBH53j9HZ8CVqGWpC7P5/v8dP4qEY=
|
||||
github.com/mna/pigeon v1.0.0/go.mod h1:Iym28+kJVnC1hfQvv5MUtI6AiFFzvQjHcvI4RFTG/04=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
|
||||
|
@ -307,7 +297,6 @@ github.com/moul/anonuuid v0.0.0-20160222162117-609b752a95ef h1:E/seV1Rtsnr2juBw1
|
|||
github.com/moul/anonuuid v0.0.0-20160222162117-609b752a95ef/go.mod h1:LgKrp0Iss/BVwquptq4eIe6HPr0s3t1WHT5x0qOh14U=
|
||||
github.com/moul/gotty-client v0.0.0-20180327180212-b26a57ebc215 h1:y6FZWUBBt1iPmJyGbGza3ncvVBMKzgd32oFChRZR7Do=
|
||||
github.com/moul/gotty-client v0.0.0-20180327180212-b26a57ebc215/go.mod h1:CxM/JGtpRrEPve5H04IhxJrGhxgwxMc6jSP2T4YD60w=
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
||||
github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32 h1:W6apQkHrMkS0Muv8G/TipAy/FJl/rCYT0+EuS8+Z0z4=
|
||||
github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32/go.mod h1:9wM+0iRr9ahx58uYLpLIr5fm8diHn0JbqRycJi6w0Ms=
|
||||
github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJEU3ofeGjhHklVoIGuVj85JJwZ6kWPaJwCIxgnFmo=
|
||||
|
@ -322,7 +311,6 @@ github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+W
|
|||
github.com/onsi/gomega v1.4.3 h1:RE1xgDvH7imwFD45h+u2SgIfERHlS2yNG4DObb5BSKU=
|
||||
github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
|
||||
github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8=
|
||||
github.com/openzipkin/zipkin-go v0.1.6/go.mod h1:QgAqvLzwWbR/WpD4A3cGpPtJrZXNIiJc5AZX7/PBEpw=
|
||||
github.com/oracle/oci-go-sdk v1.8.0 h1:4SO45bKV0I3/Mn1os3ANDZmV0eSE5z5CLdSUIkxtyzs=
|
||||
github.com/oracle/oci-go-sdk v1.8.0/go.mod h1:VQb79nF8Z2cwLkLS35ukwStZIg5F66tcBccjip/j888=
|
||||
github.com/packer-community/winrmcp v0.0.0-20180921204643-0fd363d6159a h1:A3QMuteviunoaY/8ex+RKFqwhcZJ/Cf3fCW3IwL2wx4=
|
||||
|
@ -342,16 +330,9 @@ github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndr
|
|||
github.com/profitbricks/profitbricks-sdk-go v4.0.2+incompatible h1:ZoVHH6voxW9Onzo6z2yLtocVoN6mBocyDoqoyAMHokE=
|
||||
github.com/profitbricks/profitbricks-sdk-go v4.0.2+incompatible/go.mod h1:T3/WrziK7fYH3C8ilAFAHe99R452/IzIG3YYkqaOFeQ=
|
||||
github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
|
||||
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
|
||||
github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829/go.mod h1:p2iRAGwDERtqlqzRXnrOVns+ignqQo//hLXqYxZYVNs=
|
||||
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
|
||||
github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
|
||||
github.com/prometheus/common v0.0.0-20180801064454-c7de2306084e/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro=
|
||||
github.com/prometheus/common v0.2.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
|
||||
github.com/prometheus/procfs v0.0.0-20180725123919-05ee40e3a273/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
|
||||
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
|
||||
github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
|
||||
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
|
||||
github.com/renstrom/fuzzysearch v0.0.0-20160331204855-2d205ac6ec17 h1:4qPms2txLWMLXKzqlnYSulKRS4cS9aYgPtAEpUelQok=
|
||||
github.com/renstrom/fuzzysearch v0.0.0-20160331204855-2d205ac6ec17/go.mod h1:SAEjPB4voP88qmWJXI7mA5m15uNlEnuHLx4Eu2mPGpQ=
|
||||
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
|
||||
|
@ -361,6 +342,8 @@ github.com/rwtodd/Go.Sed v0.0.0-20170507045331-d6d5d585814e/go.mod h1:8AEUvGVi2u
|
|||
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
|
||||
github.com/ryanuber/go-glob v0.0.0-20170128012129-256dc444b735 h1:7YvPJVmEeFHR1Tj9sZEYsmarJEQfMVYpd/Vyy/A8dqE=
|
||||
github.com/ryanuber/go-glob v0.0.0-20170128012129-256dc444b735/go.mod h1:807d1WSdnB0XRJzKNil9Om6lcp/3a0v4qIHxIXzX/Yc=
|
||||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
|
||||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
|
||||
github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww=
|
||||
github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
|
||||
github.com/scaleway/scaleway-cli v0.0.0-20180921094345-7b12c9699d70 h1:DaqC32ZwOuO4ctgg9qAdKnlQxwFPkKmCOEqwSNwYy7c=
|
||||
|
@ -406,6 +389,8 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf
|
|||
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA=
|
||||
github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea h1:hH8P1IiDpzRU6ZDbDh/RDnVuezi2oOXJpApa06M0zyI=
|
||||
github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea/go.mod h1:aOux3gHPCftJ3KHq6Pz/AlDjYJ7Y+yKfm1gU/3B0u04=
|
||||
github.com/tencentcloud/tencentcloud-sdk-go v0.0.0-20181220135002-f1744d40d346 h1:a014AaXz7AISMePv8xKRffUZZkr5z2XmSDf41gRV3+A=
|
||||
github.com/tencentcloud/tencentcloud-sdk-go v0.0.0-20181220135002-f1744d40d346/go.mod h1:0PfYow01SHPMhKY31xa+EFz2RStxIqj6JFAJS+IkCi4=
|
||||
github.com/ugorji/go v0.0.0-20151218193438-646ae4a518c1 h1:U6ufy3mLDgg9RYupntOvAF7xCmNNquyKaYaaVHo1Nnk=
|
||||
|
@ -422,10 +407,6 @@ github.com/yandex-cloud/go-sdk v0.0.0-20190402114215-3fc1d6947035 h1:2ZLZeg6xp+k
|
|||
github.com/yandex-cloud/go-sdk v0.0.0-20190402114215-3fc1d6947035/go.mod h1:Eml0jFLU4VVHgIN8zPHMuNwZXVzUMILyO6lQZSfz854=
|
||||
go.opencensus.io v0.18.0 h1:Mk5rgZcggtbvtAun5aJzAtjKKN/t0R3jJPlWILlv938=
|
||||
go.opencensus.io v0.18.0/go.mod h1:vKdFvxhtzZ9onBp9VKHK8z/sRpBMnKAsufL7wlDrCOA=
|
||||
go.opencensus.io v0.20.1 h1:pMEjRZ1M4ebWGikflH7nQpV6+Zr88KBMA2XJD3sbijw=
|
||||
go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk=
|
||||
go.opencensus.io v0.20.2 h1:NAfh7zF0/3/HqtMvJNZ/RFrSlCE6ZTlHmKfhL/Dm1Jk=
|
||||
go.opencensus.io v0.20.2/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk=
|
||||
go.opencensus.io v0.21.0 h1:mU6zScU4U1YAFPHEHYk+3JC4SY7JxgkqS10ZOSyksNg=
|
||||
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
|
||||
go4.org v0.0.0-20180809161055-417644f6feb5/go.mod h1:MkTOUMDaeVYJUOUsaDXIhWPZYa1yOyC1qaOBpL57BhE=
|
||||
|
@ -444,6 +425,7 @@ golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTk
|
|||
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
|
||||
golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
|
||||
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
|
||||
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
|
@ -455,7 +437,6 @@ golang.org/x/net v0.0.0-20181201002055-351d144fa1fc h1:a3CU5tJYVj92DY2LaA1kUkrsq
|
|||
golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190125091013-d26f9f9a57f3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd h1:HuTn7WObtcDo9uEEU7rEqL0jYthdXAmZ6PP+meazmaU=
|
||||
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190311183353-d8887717615a h1:oWX7TPOiFAMXLq8o0ikBYfCJVlRHBcsciT5bXOrH628=
|
||||
|
@ -487,8 +468,6 @@ golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5 h1:x6r4Jo0KNzOOzYd8lbcRsqjuq
|
|||
golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20181029174526-d69651ed3497/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20181213200352-4d1cda033e06/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
|
@ -512,14 +491,10 @@ golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGm
|
|||
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
|
||||
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
|
||||
golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
|
||||
golang.org/x/tools v0.0.0-20190530184349-ce1a3806b557 h1:WFdP1eIY3AwGUPgVua5UIX4C7BzCIK8TOwm6RA+0vAQ=
|
||||
golang.org/x/tools v0.0.0-20190530184349-ce1a3806b557/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
|
||||
google.golang.org/api v0.0.0-20180910000450-7ca32eb868bf/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0=
|
||||
google.golang.org/api v0.0.0-20181030000543-1d582fd0359e/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0=
|
||||
google.golang.org/api v0.1.0 h1:K6z2u68e86TPdSdefXdzvXgR1zEMa+459vBSfWYAZkI=
|
||||
google.golang.org/api v0.1.0/go.mod h1:UGEZY7KEX120AnNLIHFMKIo4obdJhkp2tPbaPlQx13Y=
|
||||
google.golang.org/api v0.3.1 h1:oJra/lMfmtm13/rgY/8i3MzjFWYXvQIAKjQ3HqofMk8=
|
||||
google.golang.org/api v0.3.1/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk=
|
||||
google.golang.org/api v0.4.0 h1:KKgc1aqhV8wDPbDzlDtpvyjZFY3vjz85FP7p4wcQUyI=
|
||||
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
|
||||
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
|
||||
|
@ -540,11 +515,8 @@ google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9M
|
|||
google.golang.org/grpc v1.17.0 h1:TRJYBgMclJvGYn2rIMjj+h9KtMt5r1Ij7ODVRIZkwhk=
|
||||
google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs=
|
||||
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
|
||||
google.golang.org/grpc v1.19.1 h1:TrBcJ1yqAl1G++wO39nD/qtgpsW9/1+QGrluyMGEYgM=
|
||||
google.golang.org/grpc v1.19.1/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
|
||||
google.golang.org/grpc v1.20.1 h1:Hz2g2wirWK7H0qIIhGIqRGTuMwTE8HEKFnDZZ7lm9NU=
|
||||
google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
|
||||
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
testdata/* linguist-vendored
|
|
@ -0,0 +1,16 @@
|
|||
# editor temporary files
|
||||
*.sublime-*
|
||||
.DS_Store
|
||||
*.swp
|
||||
#*.*#
|
||||
tags
|
||||
|
||||
# direnv config
|
||||
.env*
|
||||
|
||||
# test binaries
|
||||
*.test
|
||||
|
||||
# coverage and profilte outputs
|
||||
*.out
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
language: go
|
||||
|
||||
go:
|
||||
- 1.1
|
||||
- 1.2.x
|
||||
- 1.3.x
|
||||
- 1.4.x
|
||||
- 1.5.x
|
||||
- 1.6.x
|
||||
- 1.7.x
|
||||
- 1.8.x
|
||||
- 1.9.x
|
||||
- "1.10.x"
|
||||
- 1.11.x
|
||||
- tip
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
Copyright (c) 2012-2016, Martin Angers & Contributors
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,179 @@
|
|||
# goquery - a little like that j-thing, only in Go
|
||||
[![build status](https://secure.travis-ci.org/PuerkitoBio/goquery.svg?branch=master)](http://travis-ci.org/PuerkitoBio/goquery) [![GoDoc](https://godoc.org/github.com/PuerkitoBio/goquery?status.png)](http://godoc.org/github.com/PuerkitoBio/goquery) [![Sourcegraph Badge](https://sourcegraph.com/github.com/PuerkitoBio/goquery/-/badge.svg)](https://sourcegraph.com/github.com/PuerkitoBio/goquery?badge)
|
||||
|
||||
goquery brings a syntax and a set of features similar to [jQuery][] to the [Go language][go]. It is based on Go's [net/html package][html] and the CSS Selector library [cascadia][]. Since the net/html parser returns nodes, and not a full-featured DOM tree, jQuery's stateful manipulation functions (like height(), css(), detach()) have been left off.
|
||||
|
||||
Also, because the net/html parser requires UTF-8 encoding, so does goquery: it is the caller's responsibility to ensure that the source document provides UTF-8 encoded HTML. See the [wiki][] for various options to do this.
|
||||
|
||||
Syntax-wise, it is as close as possible to jQuery, with the same function names when possible, and that warm and fuzzy chainable interface. jQuery being the ultra-popular library that it is, I felt that writing a similar HTML-manipulating library was better to follow its API than to start anew (in the same spirit as Go's `fmt` package), even though some of its methods are less than intuitive (looking at you, [index()][index]...).
|
||||
|
||||
## Table of Contents
|
||||
|
||||
* [Installation](#installation)
|
||||
* [Changelog](#changelog)
|
||||
* [API](#api)
|
||||
* [Examples](#examples)
|
||||
* [Related Projects](#related-projects)
|
||||
* [Support](#support)
|
||||
* [License](#license)
|
||||
|
||||
## Installation
|
||||
|
||||
Please note that because of the net/html dependency, goquery requires Go1.1+.
|
||||
|
||||
$ go get github.com/PuerkitoBio/goquery
|
||||
|
||||
(optional) To run unit tests:
|
||||
|
||||
$ cd $GOPATH/src/github.com/PuerkitoBio/goquery
|
||||
$ go test
|
||||
|
||||
(optional) To run benchmarks (warning: it runs for a few minutes):
|
||||
|
||||
$ cd $GOPATH/src/github.com/PuerkitoBio/goquery
|
||||
$ go test -bench=".*"
|
||||
|
||||
## Changelog
|
||||
|
||||
**Note that goquery's API is now stable, and will not break.**
|
||||
|
||||
* **2018-11-15 (v1.5.0)** : Go module support (thanks @Zaba505).
|
||||
* **2018-06-07 (v1.4.1)** : Add `NewDocumentFromReader` examples.
|
||||
* **2018-03-24 (v1.4.0)** : Deprecate `NewDocument(url)` and `NewDocumentFromResponse(response)`.
|
||||
* **2018-01-28 (v1.3.0)** : Add `ToEnd` constant to `Slice` until the end of the selection (thanks to @davidjwilkins for raising the issue).
|
||||
* **2018-01-11 (v1.2.0)** : Add `AddBack*` and deprecate `AndSelf` (thanks to @davidjwilkins).
|
||||
* **2017-02-12 (v1.1.0)** : Add `SetHtml` and `SetText` (thanks to @glebtv).
|
||||
* **2016-12-29 (v1.0.2)** : Optimize allocations for `Selection.Text` (thanks to @radovskyb).
|
||||
* **2016-08-28 (v1.0.1)** : Optimize performance for large documents.
|
||||
* **2016-07-27 (v1.0.0)** : Tag version 1.0.0.
|
||||
* **2016-06-15** : Invalid selector strings internally compile to a `Matcher` implementation that never matches any node (instead of a panic). So for example, `doc.Find("~")` returns an empty `*Selection` object.
|
||||
* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see godoc for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`).
|
||||
* **2015-04-20** : Add `AttrOr` helper method to return the attribute's value or a default value if absent. Thanks to [piotrkowalczuk][piotr].
|
||||
* **2015-02-04** : Add more manipulation functions - Prepend* - thanks again to [Andrew Stone][thatguystone].
|
||||
* **2014-11-28** : Add more manipulation functions - ReplaceWith*, Wrap* and Unwrap - thanks again to [Andrew Stone][thatguystone].
|
||||
* **2014-11-07** : Add manipulation functions (thanks to [Andrew Stone][thatguystone]) and `*Matcher` functions, that receive compiled cascadia selectors instead of selector strings, thus avoiding potential panics thrown by goquery via `cascadia.MustCompile` calls. This results in better performance (selectors can be compiled once and reused) and more idiomatic error handling (you can handle cascadia's compilation errors, instead of recovering from panics, which had been bugging me for a long time). Note that the actual type expected is a `Matcher` interface, that `cascadia.Selector` implements. Other matcher implementations could be used.
|
||||
* **2014-11-06** : Change import paths of net/html to golang.org/x/net/html (see https://groups.google.com/forum/#!topic/golang-nuts/eD8dh3T9yyA). Make sure to update your code to use the new import path too when you call goquery with `html.Node`s.
|
||||
* **v0.3.2** : Add `NewDocumentFromReader()` (thanks jweir) which allows creating a goquery document from an io.Reader.
|
||||
* **v0.3.1** : Add `NewDocumentFromResponse()` (thanks assassingj) which allows creating a goquery document from an http response.
|
||||
* **v0.3.0** : Add `EachWithBreak()` which allows to break out of an `Each()` loop by returning false. This function was added instead of changing the existing `Each()` to avoid breaking compatibility.
|
||||
* **v0.2.1** : Make go-getable, now that [go.net/html is Go1.0-compatible][gonet] (thanks to @matrixik for pointing this out).
|
||||
* **v0.2.0** : Add support for negative indices in Slice(). **BREAKING CHANGE** `Document.Root` is removed, `Document` is now a `Selection` itself (a selection of one, the root element, just like `Document.Root` was before). Add jQuery's Closest() method.
|
||||
* **v0.1.1** : Add benchmarks to use as baseline for refactorings, refactor Next...() and Prev...() methods to use the new html package's linked list features (Next/PrevSibling, FirstChild). Good performance boost (40+% in some cases).
|
||||
* **v0.1.0** : Initial release.
|
||||
|
||||
## API
|
||||
|
||||
goquery exposes two structs, `Document` and `Selection`, and the `Matcher` interface. Unlike jQuery, which is loaded as part of a DOM document, and thus acts on its containing document, goquery doesn't know which HTML document to act upon. So it needs to be told, and that's what the `Document` type is for. It holds the root document node as the initial Selection value to manipulate.
|
||||
|
||||
jQuery often has many variants for the same function (no argument, a selector string argument, a jQuery object argument, a DOM element argument, ...). Instead of exposing the same features in goquery as a single method with variadic empty interface arguments, statically-typed signatures are used following this naming convention:
|
||||
|
||||
* When the jQuery equivalent can be called with no argument, it has the same name as jQuery for the no argument signature (e.g.: `Prev()`), and the version with a selector string argument is called `XxxFiltered()` (e.g.: `PrevFiltered()`)
|
||||
* When the jQuery equivalent **requires** one argument, the same name as jQuery is used for the selector string version (e.g.: `Is()`)
|
||||
* The signatures accepting a jQuery object as argument are defined in goquery as `XxxSelection()` and take a `*Selection` object as argument (e.g.: `FilterSelection()`)
|
||||
* The signatures accepting a DOM element as argument in jQuery are defined in goquery as `XxxNodes()` and take a variadic argument of type `*html.Node` (e.g.: `FilterNodes()`)
|
||||
* The signatures accepting a function as argument in jQuery are defined in goquery as `XxxFunction()` and take a function as argument (e.g.: `FilterFunction()`)
|
||||
* The goquery methods that can be called with a selector string have a corresponding version that take a `Matcher` interface and are defined as `XxxMatcher()` (e.g.: `IsMatcher()`)
|
||||
|
||||
Utility functions that are not in jQuery but are useful in Go are implemented as functions (that take a `*Selection` as parameter), to avoid a potential naming clash on the `*Selection`'s methods (reserved for jQuery-equivalent behaviour).
|
||||
|
||||
The complete [godoc reference documentation can be found here][doc].
|
||||
|
||||
Please note that Cascadia's selectors do not necessarily match all supported selectors of jQuery (Sizzle). See the [cascadia project][cascadia] for details. Invalid selector strings compile to a `Matcher` that fails to match any node. Behaviour of the various functions that take a selector string as argument follows from that fact, e.g. (where `~` is an invalid selector string):
|
||||
|
||||
* `Find("~")` returns an empty selection because the selector string doesn't match anything.
|
||||
* `Add("~")` returns a new selection that holds the same nodes as the original selection, because it didn't add any node (selector string didn't match anything).
|
||||
* `ParentsFiltered("~")` returns an empty selection because the selector string doesn't match anything.
|
||||
* `ParentsUntil("~")` returns all parents of the selection because the selector string didn't match any element to stop before the top element.
|
||||
|
||||
## Examples
|
||||
|
||||
See some tips and tricks in the [wiki][].
|
||||
|
||||
Adapted from example_test.go:
|
||||
|
||||
```Go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
func ExampleScrape() {
|
||||
// Request the HTML page.
|
||||
res, err := http.Get("http://metalsucks.net")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != 200 {
|
||||
log.Fatalf("status code error: %d %s", res.StatusCode, res.Status)
|
||||
}
|
||||
|
||||
// Load the HTML document
|
||||
doc, err := goquery.NewDocumentFromReader(res.Body)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// Find the review items
|
||||
doc.Find(".sidebar-reviews article .content-block").Each(func(i int, s *goquery.Selection) {
|
||||
// For each item found, get the band and title
|
||||
band := s.Find("a").Text()
|
||||
title := s.Find("i").Text()
|
||||
fmt.Printf("Review %d: %s - %s\n", i, band, title)
|
||||
})
|
||||
}
|
||||
|
||||
func main() {
|
||||
ExampleScrape()
|
||||
}
|
||||
```
|
||||
|
||||
## Related Projects
|
||||
|
||||
- [Goq][goq], an HTML deserialization and scraping library based on goquery and struct tags.
|
||||
- [andybalholm/cascadia][cascadia], the CSS selector library used by goquery.
|
||||
- [suntong/cascadia][cascadiacli], a command-line interface to the cascadia CSS selector library, useful to test selectors.
|
||||
- [asciimoo/colly](https://github.com/asciimoo/colly), a lightning fast and elegant Scraping Framework
|
||||
- [gnulnx/goperf](https://github.com/gnulnx/goperf), a website performance test tool that also fetches static assets.
|
||||
- [MontFerret/ferret](https://github.com/MontFerret/ferret), declarative web scraping.
|
||||
|
||||
## Support
|
||||
|
||||
There are a number of ways you can support the project:
|
||||
|
||||
* Use it, star it, build something with it, spread the word!
|
||||
- If you do build something open-source or otherwise publicly-visible, let me know so I can add it to the [Related Projects](#related-projects) section!
|
||||
* Raise issues to improve the project (note: doc typos and clarifications are issues too!)
|
||||
- Please search existing issues before opening a new one - it may have already been adressed.
|
||||
* Pull requests: please discuss new code in an issue first, unless the fix is really trivial.
|
||||
- Make sure new code is tested.
|
||||
- Be mindful of existing code - PRs that break existing code have a high probability of being declined, unless it fixes a serious issue.
|
||||
|
||||
If you desperately want to send money my way, I have a BuyMeACoffee.com page:
|
||||
|
||||
<a href="https://www.buymeacoffee.com/mna" target="_blank"><img src="https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png" alt="Buy Me A Coffee" style="height: 41px !important;width: 174px !important;box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;-webkit-box-shadow: 0px 3px 2px 0px rgba(190, 190, 190, 0.5) !important;" ></a>
|
||||
|
||||
## License
|
||||
|
||||
The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia's license is [here][caslic].
|
||||
|
||||
[jquery]: http://jquery.com/
|
||||
[go]: http://golang.org/
|
||||
[cascadia]: https://github.com/andybalholm/cascadia
|
||||
[cascadiacli]: https://github.com/suntong/cascadia
|
||||
[bsd]: http://opensource.org/licenses/BSD-3-Clause
|
||||
[golic]: http://golang.org/LICENSE
|
||||
[caslic]: https://github.com/andybalholm/cascadia/blob/master/LICENSE
|
||||
[doc]: http://godoc.org/github.com/PuerkitoBio/goquery
|
||||
[index]: http://api.jquery.com/index/
|
||||
[gonet]: https://github.com/golang/net/
|
||||
[html]: http://godoc.org/golang.org/x/net/html
|
||||
[wiki]: https://github.com/PuerkitoBio/goquery/wiki/Tips-and-tricks
|
||||
[thatguystone]: https://github.com/thatguystone
|
||||
[piotr]: https://github.com/piotrkowalczuk
|
||||
[goq]: https://github.com/andrewstuart/goq
|
|
@ -0,0 +1,124 @@
|
|||
package goquery
|
||||
|
||||
import (
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
const (
|
||||
maxUint = ^uint(0)
|
||||
maxInt = int(maxUint >> 1)
|
||||
|
||||
// ToEnd is a special index value that can be used as end index in a call
|
||||
// to Slice so that all elements are selected until the end of the Selection.
|
||||
// It is equivalent to passing (*Selection).Length().
|
||||
ToEnd = maxInt
|
||||
)
|
||||
|
||||
// First reduces the set of matched elements to the first in the set.
|
||||
// It returns a new Selection object, and an empty Selection object if the
|
||||
// the selection is empty.
|
||||
func (s *Selection) First() *Selection {
|
||||
return s.Eq(0)
|
||||
}
|
||||
|
||||
// Last reduces the set of matched elements to the last in the set.
|
||||
// It returns a new Selection object, and an empty Selection object if
|
||||
// the selection is empty.
|
||||
func (s *Selection) Last() *Selection {
|
||||
return s.Eq(-1)
|
||||
}
|
||||
|
||||
// Eq reduces the set of matched elements to the one at the specified index.
|
||||
// If a negative index is given, it counts backwards starting at the end of the
|
||||
// set. It returns a new Selection object, and an empty Selection object if the
|
||||
// index is invalid.
|
||||
func (s *Selection) Eq(index int) *Selection {
|
||||
if index < 0 {
|
||||
index += len(s.Nodes)
|
||||
}
|
||||
|
||||
if index >= len(s.Nodes) || index < 0 {
|
||||
return newEmptySelection(s.document)
|
||||
}
|
||||
|
||||
return s.Slice(index, index+1)
|
||||
}
|
||||
|
||||
// Slice reduces the set of matched elements to a subset specified by a range
|
||||
// of indices. The start index is 0-based and indicates the index of the first
|
||||
// element to select. The end index is 0-based and indicates the index at which
|
||||
// the elements stop being selected (the end index is not selected).
|
||||
//
|
||||
// The indices may be negative, in which case they represent an offset from the
|
||||
// end of the selection.
|
||||
//
|
||||
// The special value ToEnd may be specified as end index, in which case all elements
|
||||
// until the end are selected. This works both for a positive and negative start
|
||||
// index.
|
||||
func (s *Selection) Slice(start, end int) *Selection {
|
||||
if start < 0 {
|
||||
start += len(s.Nodes)
|
||||
}
|
||||
if end == ToEnd {
|
||||
end = len(s.Nodes)
|
||||
} else if end < 0 {
|
||||
end += len(s.Nodes)
|
||||
}
|
||||
return pushStack(s, s.Nodes[start:end])
|
||||
}
|
||||
|
||||
// Get retrieves the underlying node at the specified index.
|
||||
// Get without parameter is not implemented, since the node array is available
|
||||
// on the Selection object.
|
||||
func (s *Selection) Get(index int) *html.Node {
|
||||
if index < 0 {
|
||||
index += len(s.Nodes) // Negative index gets from the end
|
||||
}
|
||||
return s.Nodes[index]
|
||||
}
|
||||
|
||||
// Index returns the position of the first element within the Selection object
|
||||
// relative to its sibling elements.
|
||||
func (s *Selection) Index() int {
|
||||
if len(s.Nodes) > 0 {
|
||||
return newSingleSelection(s.Nodes[0], s.document).PrevAll().Length()
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// IndexSelector returns the position of the first element within the
|
||||
// Selection object relative to the elements matched by the selector, or -1 if
|
||||
// not found.
|
||||
func (s *Selection) IndexSelector(selector string) int {
|
||||
if len(s.Nodes) > 0 {
|
||||
sel := s.document.Find(selector)
|
||||
return indexInSlice(sel.Nodes, s.Nodes[0])
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// IndexMatcher returns the position of the first element within the
|
||||
// Selection object relative to the elements matched by the matcher, or -1 if
|
||||
// not found.
|
||||
func (s *Selection) IndexMatcher(m Matcher) int {
|
||||
if len(s.Nodes) > 0 {
|
||||
sel := s.document.FindMatcher(m)
|
||||
return indexInSlice(sel.Nodes, s.Nodes[0])
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// IndexOfNode returns the position of the specified node within the Selection
|
||||
// object, or -1 if not found.
|
||||
func (s *Selection) IndexOfNode(node *html.Node) int {
|
||||
return indexInSlice(s.Nodes, node)
|
||||
}
|
||||
|
||||
// IndexOfSelection returns the position of the first node in the specified
|
||||
// Selection object within this Selection object, or -1 if not found.
|
||||
func (s *Selection) IndexOfSelection(sel *Selection) int {
|
||||
if sel != nil && len(sel.Nodes) > 0 {
|
||||
return indexInSlice(s.Nodes, sel.Nodes[0])
|
||||
}
|
||||
return -1
|
||||
}
|
|
@ -0,0 +1,123 @@
|
|||
// Copyright (c) 2012-2016, Martin Angers & Contributors
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation and/or
|
||||
// other materials provided with the distribution.
|
||||
// * Neither the name of the author nor the names of its contributors may be used to
|
||||
// endorse or promote products derived from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
|
||||
// OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
// AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
||||
// WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
/*
|
||||
Package goquery implements features similar to jQuery, including the chainable
|
||||
syntax, to manipulate and query an HTML document.
|
||||
|
||||
It brings a syntax and a set of features similar to jQuery to the Go language.
|
||||
It is based on Go's net/html package and the CSS Selector library cascadia.
|
||||
Since the net/html parser returns nodes, and not a full-featured DOM
|
||||
tree, jQuery's stateful manipulation functions (like height(), css(), detach())
|
||||
have been left off.
|
||||
|
||||
Also, because the net/html parser requires UTF-8 encoding, so does goquery: it is
|
||||
the caller's responsibility to ensure that the source document provides UTF-8 encoded HTML.
|
||||
See the repository's wiki for various options on how to do this.
|
||||
|
||||
Syntax-wise, it is as close as possible to jQuery, with the same method names when
|
||||
possible, and that warm and fuzzy chainable interface. jQuery being the
|
||||
ultra-popular library that it is, writing a similar HTML-manipulating
|
||||
library was better to follow its API than to start anew (in the same spirit as
|
||||
Go's fmt package), even though some of its methods are less than intuitive (looking
|
||||
at you, index()...).
|
||||
|
||||
It is hosted on GitHub, along with additional documentation in the README.md
|
||||
file: https://github.com/puerkitobio/goquery
|
||||
|
||||
Please note that because of the net/html dependency, goquery requires Go1.1+.
|
||||
|
||||
The various methods are split into files based on the category of behavior.
|
||||
The three dots (...) indicate that various "overloads" are available.
|
||||
|
||||
* array.go : array-like positional manipulation of the selection.
|
||||
- Eq()
|
||||
- First()
|
||||
- Get()
|
||||
- Index...()
|
||||
- Last()
|
||||
- Slice()
|
||||
|
||||
* expand.go : methods that expand or augment the selection's set.
|
||||
- Add...()
|
||||
- AndSelf()
|
||||
- Union(), which is an alias for AddSelection()
|
||||
|
||||
* filter.go : filtering methods, that reduce the selection's set.
|
||||
- End()
|
||||
- Filter...()
|
||||
- Has...()
|
||||
- Intersection(), which is an alias of FilterSelection()
|
||||
- Not...()
|
||||
|
||||
* iteration.go : methods to loop over the selection's nodes.
|
||||
- Each()
|
||||
- EachWithBreak()
|
||||
- Map()
|
||||
|
||||
* manipulation.go : methods for modifying the document
|
||||
- After...()
|
||||
- Append...()
|
||||
- Before...()
|
||||
- Clone()
|
||||
- Empty()
|
||||
- Prepend...()
|
||||
- Remove...()
|
||||
- ReplaceWith...()
|
||||
- Unwrap()
|
||||
- Wrap...()
|
||||
- WrapAll...()
|
||||
- WrapInner...()
|
||||
|
||||
* property.go : methods that inspect and get the node's properties values.
|
||||
- Attr*(), RemoveAttr(), SetAttr()
|
||||
- AddClass(), HasClass(), RemoveClass(), ToggleClass()
|
||||
- Html()
|
||||
- Length()
|
||||
- Size(), which is an alias for Length()
|
||||
- Text()
|
||||
|
||||
* query.go : methods that query, or reflect, a node's identity.
|
||||
- Contains()
|
||||
- Is...()
|
||||
|
||||
* traversal.go : methods to traverse the HTML document tree.
|
||||
- Children...()
|
||||
- Contents()
|
||||
- Find...()
|
||||
- Next...()
|
||||
- Parent[s]...()
|
||||
- Prev...()
|
||||
- Siblings...()
|
||||
|
||||
* type.go : definition of the types exposed by goquery.
|
||||
- Document
|
||||
- Selection
|
||||
- Matcher
|
||||
|
||||
* utilities.go : definition of helper functions (and not methods on a *Selection)
|
||||
that are not part of jQuery, but are useful to goquery.
|
||||
- NodeName
|
||||
- OuterHtml
|
||||
*/
|
||||
package goquery
|
|
@ -0,0 +1,70 @@
|
|||
package goquery
|
||||
|
||||
import "golang.org/x/net/html"
|
||||
|
||||
// Add adds the selector string's matching nodes to those in the current
|
||||
// selection and returns a new Selection object.
|
||||
// The selector string is run in the context of the document of the current
|
||||
// Selection object.
|
||||
func (s *Selection) Add(selector string) *Selection {
|
||||
return s.AddNodes(findWithMatcher([]*html.Node{s.document.rootNode}, compileMatcher(selector))...)
|
||||
}
|
||||
|
||||
// AddMatcher adds the matcher's matching nodes to those in the current
|
||||
// selection and returns a new Selection object.
|
||||
// The matcher is run in the context of the document of the current
|
||||
// Selection object.
|
||||
func (s *Selection) AddMatcher(m Matcher) *Selection {
|
||||
return s.AddNodes(findWithMatcher([]*html.Node{s.document.rootNode}, m)...)
|
||||
}
|
||||
|
||||
// AddSelection adds the specified Selection object's nodes to those in the
|
||||
// current selection and returns a new Selection object.
|
||||
func (s *Selection) AddSelection(sel *Selection) *Selection {
|
||||
if sel == nil {
|
||||
return s.AddNodes()
|
||||
}
|
||||
return s.AddNodes(sel.Nodes...)
|
||||
}
|
||||
|
||||
// Union is an alias for AddSelection.
|
||||
func (s *Selection) Union(sel *Selection) *Selection {
|
||||
return s.AddSelection(sel)
|
||||
}
|
||||
|
||||
// AddNodes adds the specified nodes to those in the
|
||||
// current selection and returns a new Selection object.
|
||||
func (s *Selection) AddNodes(nodes ...*html.Node) *Selection {
|
||||
return pushStack(s, appendWithoutDuplicates(s.Nodes, nodes, nil))
|
||||
}
|
||||
|
||||
// AndSelf adds the previous set of elements on the stack to the current set.
|
||||
// It returns a new Selection object containing the current Selection combined
|
||||
// with the previous one.
|
||||
// Deprecated: This function has been deprecated and is now an alias for AddBack().
|
||||
func (s *Selection) AndSelf() *Selection {
|
||||
return s.AddBack()
|
||||
}
|
||||
|
||||
// AddBack adds the previous set of elements on the stack to the current set.
|
||||
// It returns a new Selection object containing the current Selection combined
|
||||
// with the previous one.
|
||||
func (s *Selection) AddBack() *Selection {
|
||||
return s.AddSelection(s.prevSel)
|
||||
}
|
||||
|
||||
// AddBackFiltered reduces the previous set of elements on the stack to those that
|
||||
// match the selector string, and adds them to the current set.
|
||||
// It returns a new Selection object containing the current Selection combined
|
||||
// with the filtered previous one
|
||||
func (s *Selection) AddBackFiltered(selector string) *Selection {
|
||||
return s.AddSelection(s.prevSel.Filter(selector))
|
||||
}
|
||||
|
||||
// AddBackMatcher reduces the previous set of elements on the stack to those that match
|
||||
// the mateher, and adds them to the curernt set.
|
||||
// It returns a new Selection object containing the current Selection combined
|
||||
// with the filtered previous one
|
||||
func (s *Selection) AddBackMatcher(m Matcher) *Selection {
|
||||
return s.AddSelection(s.prevSel.FilterMatcher(m))
|
||||
}
|
|
@ -0,0 +1,163 @@
|
|||
package goquery
|
||||
|
||||
import "golang.org/x/net/html"
|
||||
|
||||
// Filter reduces the set of matched elements to those that match the selector string.
|
||||
// It returns a new Selection object for this subset of matching elements.
|
||||
func (s *Selection) Filter(selector string) *Selection {
|
||||
return s.FilterMatcher(compileMatcher(selector))
|
||||
}
|
||||
|
||||
// FilterMatcher reduces the set of matched elements to those that match
|
||||
// the given matcher. It returns a new Selection object for this subset
|
||||
// of matching elements.
|
||||
func (s *Selection) FilterMatcher(m Matcher) *Selection {
|
||||
return pushStack(s, winnow(s, m, true))
|
||||
}
|
||||
|
||||
// Not removes elements from the Selection that match the selector string.
|
||||
// It returns a new Selection object with the matching elements removed.
|
||||
func (s *Selection) Not(selector string) *Selection {
|
||||
return s.NotMatcher(compileMatcher(selector))
|
||||
}
|
||||
|
||||
// NotMatcher removes elements from the Selection that match the given matcher.
|
||||
// It returns a new Selection object with the matching elements removed.
|
||||
func (s *Selection) NotMatcher(m Matcher) *Selection {
|
||||
return pushStack(s, winnow(s, m, false))
|
||||
}
|
||||
|
||||
// FilterFunction reduces the set of matched elements to those that pass the function's test.
|
||||
// It returns a new Selection object for this subset of elements.
|
||||
func (s *Selection) FilterFunction(f func(int, *Selection) bool) *Selection {
|
||||
return pushStack(s, winnowFunction(s, f, true))
|
||||
}
|
||||
|
||||
// NotFunction removes elements from the Selection that pass the function's test.
|
||||
// It returns a new Selection object with the matching elements removed.
|
||||
func (s *Selection) NotFunction(f func(int, *Selection) bool) *Selection {
|
||||
return pushStack(s, winnowFunction(s, f, false))
|
||||
}
|
||||
|
||||
// FilterNodes reduces the set of matched elements to those that match the specified nodes.
|
||||
// It returns a new Selection object for this subset of elements.
|
||||
func (s *Selection) FilterNodes(nodes ...*html.Node) *Selection {
|
||||
return pushStack(s, winnowNodes(s, nodes, true))
|
||||
}
|
||||
|
||||
// NotNodes removes elements from the Selection that match the specified nodes.
|
||||
// It returns a new Selection object with the matching elements removed.
|
||||
func (s *Selection) NotNodes(nodes ...*html.Node) *Selection {
|
||||
return pushStack(s, winnowNodes(s, nodes, false))
|
||||
}
|
||||
|
||||
// FilterSelection reduces the set of matched elements to those that match a
|
||||
// node in the specified Selection object.
|
||||
// It returns a new Selection object for this subset of elements.
|
||||
func (s *Selection) FilterSelection(sel *Selection) *Selection {
|
||||
if sel == nil {
|
||||
return pushStack(s, winnowNodes(s, nil, true))
|
||||
}
|
||||
return pushStack(s, winnowNodes(s, sel.Nodes, true))
|
||||
}
|
||||
|
||||
// NotSelection removes elements from the Selection that match a node in the specified
|
||||
// Selection object. It returns a new Selection object with the matching elements removed.
|
||||
func (s *Selection) NotSelection(sel *Selection) *Selection {
|
||||
if sel == nil {
|
||||
return pushStack(s, winnowNodes(s, nil, false))
|
||||
}
|
||||
return pushStack(s, winnowNodes(s, sel.Nodes, false))
|
||||
}
|
||||
|
||||
// Intersection is an alias for FilterSelection.
|
||||
func (s *Selection) Intersection(sel *Selection) *Selection {
|
||||
return s.FilterSelection(sel)
|
||||
}
|
||||
|
||||
// Has reduces the set of matched elements to those that have a descendant
|
||||
// that matches the selector.
|
||||
// It returns a new Selection object with the matching elements.
|
||||
func (s *Selection) Has(selector string) *Selection {
|
||||
return s.HasSelection(s.document.Find(selector))
|
||||
}
|
||||
|
||||
// HasMatcher reduces the set of matched elements to those that have a descendant
|
||||
// that matches the matcher.
|
||||
// It returns a new Selection object with the matching elements.
|
||||
func (s *Selection) HasMatcher(m Matcher) *Selection {
|
||||
return s.HasSelection(s.document.FindMatcher(m))
|
||||
}
|
||||
|
||||
// HasNodes reduces the set of matched elements to those that have a
|
||||
// descendant that matches one of the nodes.
|
||||
// It returns a new Selection object with the matching elements.
|
||||
func (s *Selection) HasNodes(nodes ...*html.Node) *Selection {
|
||||
return s.FilterFunction(func(_ int, sel *Selection) bool {
|
||||
// Add all nodes that contain one of the specified nodes
|
||||
for _, n := range nodes {
|
||||
if sel.Contains(n) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
})
|
||||
}
|
||||
|
||||
// HasSelection reduces the set of matched elements to those that have a
|
||||
// descendant that matches one of the nodes of the specified Selection object.
|
||||
// It returns a new Selection object with the matching elements.
|
||||
func (s *Selection) HasSelection(sel *Selection) *Selection {
|
||||
if sel == nil {
|
||||
return s.HasNodes()
|
||||
}
|
||||
return s.HasNodes(sel.Nodes...)
|
||||
}
|
||||
|
||||
// End ends the most recent filtering operation in the current chain and
|
||||
// returns the set of matched elements to its previous state.
|
||||
func (s *Selection) End() *Selection {
|
||||
if s.prevSel != nil {
|
||||
return s.prevSel
|
||||
}
|
||||
return newEmptySelection(s.document)
|
||||
}
|
||||
|
||||
// Filter based on the matcher, and the indicator to keep (Filter) or
|
||||
// to get rid of (Not) the matching elements.
|
||||
func winnow(sel *Selection, m Matcher, keep bool) []*html.Node {
|
||||
// Optimize if keep is requested
|
||||
if keep {
|
||||
return m.Filter(sel.Nodes)
|
||||
}
|
||||
// Use grep
|
||||
return grep(sel, func(i int, s *Selection) bool {
|
||||
return !m.Match(s.Get(0))
|
||||
})
|
||||
}
|
||||
|
||||
// Filter based on an array of nodes, and the indicator to keep (Filter) or
|
||||
// to get rid of (Not) the matching elements.
|
||||
func winnowNodes(sel *Selection, nodes []*html.Node, keep bool) []*html.Node {
|
||||
if len(nodes)+len(sel.Nodes) < minNodesForSet {
|
||||
return grep(sel, func(i int, s *Selection) bool {
|
||||
return isInSlice(nodes, s.Get(0)) == keep
|
||||
})
|
||||
}
|
||||
|
||||
set := make(map[*html.Node]bool)
|
||||
for _, n := range nodes {
|
||||
set[n] = true
|
||||
}
|
||||
return grep(sel, func(i int, s *Selection) bool {
|
||||
return set[s.Get(0)] == keep
|
||||
})
|
||||
}
|
||||
|
||||
// Filter based on a function test, and the indicator to keep (Filter) or
|
||||
// to get rid of (Not) the matching elements.
|
||||
func winnowFunction(sel *Selection, f func(int, *Selection) bool, keep bool) []*html.Node {
|
||||
return grep(sel, func(i int, s *Selection) bool {
|
||||
return f(i, s) == keep
|
||||
})
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
module github.com/PuerkitoBio/goquery
|
||||
|
||||
require (
|
||||
github.com/andybalholm/cascadia v1.0.0
|
||||
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a
|
||||
)
|
|
@ -0,0 +1,5 @@
|
|||
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
|
||||
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a h1:gOpx8G595UYyvj8UK4+OFyY4rx037g3fmfhe5SasG3U=
|
||||
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
|
@ -0,0 +1,39 @@
|
|||
package goquery
|
||||
|
||||
// Each iterates over a Selection object, executing a function for each
|
||||
// matched element. It returns the current Selection object. The function
|
||||
// f is called for each element in the selection with the index of the
|
||||
// element in that selection starting at 0, and a *Selection that contains
|
||||
// only that element.
|
||||
func (s *Selection) Each(f func(int, *Selection)) *Selection {
|
||||
for i, n := range s.Nodes {
|
||||
f(i, newSingleSelection(n, s.document))
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// EachWithBreak iterates over a Selection object, executing a function for each
|
||||
// matched element. It is identical to Each except that it is possible to break
|
||||
// out of the loop by returning false in the callback function. It returns the
|
||||
// current Selection object.
|
||||
func (s *Selection) EachWithBreak(f func(int, *Selection) bool) *Selection {
|
||||
for i, n := range s.Nodes {
|
||||
if !f(i, newSingleSelection(n, s.document)) {
|
||||
return s
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Map passes each element in the current matched set through a function,
|
||||
// producing a slice of string holding the returned values. The function
|
||||
// f is called for each element in the selection with the index of the
|
||||
// element in that selection starting at 0, and a *Selection that contains
|
||||
// only that element.
|
||||
func (s *Selection) Map(f func(int, *Selection) string) (result []string) {
|
||||
for i, n := range s.Nodes {
|
||||
result = append(result, f(i, newSingleSelection(n, s.document)))
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
|
@ -0,0 +1,574 @@
|
|||
package goquery
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// After applies the selector from the root document and inserts the matched elements
|
||||
// after the elements in the set of matched elements.
|
||||
//
|
||||
// If one of the matched elements in the selection is not currently in the
|
||||
// document, it's impossible to insert nodes after it, so it will be ignored.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) After(selector string) *Selection {
|
||||
return s.AfterMatcher(compileMatcher(selector))
|
||||
}
|
||||
|
||||
// AfterMatcher applies the matcher from the root document and inserts the matched elements
|
||||
// after the elements in the set of matched elements.
|
||||
//
|
||||
// If one of the matched elements in the selection is not currently in the
|
||||
// document, it's impossible to insert nodes after it, so it will be ignored.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) AfterMatcher(m Matcher) *Selection {
|
||||
return s.AfterNodes(m.MatchAll(s.document.rootNode)...)
|
||||
}
|
||||
|
||||
// AfterSelection inserts the elements in the selection after each element in the set of matched
|
||||
// elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) AfterSelection(sel *Selection) *Selection {
|
||||
return s.AfterNodes(sel.Nodes...)
|
||||
}
|
||||
|
||||
// AfterHtml parses the html and inserts it after the set of matched elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) AfterHtml(html string) *Selection {
|
||||
return s.AfterNodes(parseHtml(html)...)
|
||||
}
|
||||
|
||||
// AfterNodes inserts the nodes after each element in the set of matched elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) AfterNodes(ns ...*html.Node) *Selection {
|
||||
return s.manipulateNodes(ns, true, func(sn *html.Node, n *html.Node) {
|
||||
if sn.Parent != nil {
|
||||
sn.Parent.InsertBefore(n, sn.NextSibling)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Append appends the elements specified by the selector to the end of each element
|
||||
// in the set of matched elements, following those rules:
|
||||
//
|
||||
// 1) The selector is applied to the root document.
|
||||
//
|
||||
// 2) Elements that are part of the document will be moved to the new location.
|
||||
//
|
||||
// 3) If there are multiple locations to append to, cloned nodes will be
|
||||
// appended to all target locations except the last one, which will be moved
|
||||
// as noted in (2).
|
||||
func (s *Selection) Append(selector string) *Selection {
|
||||
return s.AppendMatcher(compileMatcher(selector))
|
||||
}
|
||||
|
||||
// AppendMatcher appends the elements specified by the matcher to the end of each element
|
||||
// in the set of matched elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) AppendMatcher(m Matcher) *Selection {
|
||||
return s.AppendNodes(m.MatchAll(s.document.rootNode)...)
|
||||
}
|
||||
|
||||
// AppendSelection appends the elements in the selection to the end of each element
|
||||
// in the set of matched elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) AppendSelection(sel *Selection) *Selection {
|
||||
return s.AppendNodes(sel.Nodes...)
|
||||
}
|
||||
|
||||
// AppendHtml parses the html and appends it to the set of matched elements.
|
||||
func (s *Selection) AppendHtml(html string) *Selection {
|
||||
return s.AppendNodes(parseHtml(html)...)
|
||||
}
|
||||
|
||||
// AppendNodes appends the specified nodes to each node in the set of matched elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) AppendNodes(ns ...*html.Node) *Selection {
|
||||
return s.manipulateNodes(ns, false, func(sn *html.Node, n *html.Node) {
|
||||
sn.AppendChild(n)
|
||||
})
|
||||
}
|
||||
|
||||
// Before inserts the matched elements before each element in the set of matched elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) Before(selector string) *Selection {
|
||||
return s.BeforeMatcher(compileMatcher(selector))
|
||||
}
|
||||
|
||||
// BeforeMatcher inserts the matched elements before each element in the set of matched elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) BeforeMatcher(m Matcher) *Selection {
|
||||
return s.BeforeNodes(m.MatchAll(s.document.rootNode)...)
|
||||
}
|
||||
|
||||
// BeforeSelection inserts the elements in the selection before each element in the set of matched
|
||||
// elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) BeforeSelection(sel *Selection) *Selection {
|
||||
return s.BeforeNodes(sel.Nodes...)
|
||||
}
|
||||
|
||||
// BeforeHtml parses the html and inserts it before the set of matched elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) BeforeHtml(html string) *Selection {
|
||||
return s.BeforeNodes(parseHtml(html)...)
|
||||
}
|
||||
|
||||
// BeforeNodes inserts the nodes before each element in the set of matched elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) BeforeNodes(ns ...*html.Node) *Selection {
|
||||
return s.manipulateNodes(ns, false, func(sn *html.Node, n *html.Node) {
|
||||
if sn.Parent != nil {
|
||||
sn.Parent.InsertBefore(n, sn)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Clone creates a deep copy of the set of matched nodes. The new nodes will not be
|
||||
// attached to the document.
|
||||
func (s *Selection) Clone() *Selection {
|
||||
ns := newEmptySelection(s.document)
|
||||
ns.Nodes = cloneNodes(s.Nodes)
|
||||
return ns
|
||||
}
|
||||
|
||||
// Empty removes all children nodes from the set of matched elements.
|
||||
// It returns the children nodes in a new Selection.
|
||||
func (s *Selection) Empty() *Selection {
|
||||
var nodes []*html.Node
|
||||
|
||||
for _, n := range s.Nodes {
|
||||
for c := n.FirstChild; c != nil; c = n.FirstChild {
|
||||
n.RemoveChild(c)
|
||||
nodes = append(nodes, c)
|
||||
}
|
||||
}
|
||||
|
||||
return pushStack(s, nodes)
|
||||
}
|
||||
|
||||
// Prepend prepends the elements specified by the selector to each element in
|
||||
// the set of matched elements, following the same rules as Append.
|
||||
func (s *Selection) Prepend(selector string) *Selection {
|
||||
return s.PrependMatcher(compileMatcher(selector))
|
||||
}
|
||||
|
||||
// PrependMatcher prepends the elements specified by the matcher to each
|
||||
// element in the set of matched elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) PrependMatcher(m Matcher) *Selection {
|
||||
return s.PrependNodes(m.MatchAll(s.document.rootNode)...)
|
||||
}
|
||||
|
||||
// PrependSelection prepends the elements in the selection to each element in
|
||||
// the set of matched elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) PrependSelection(sel *Selection) *Selection {
|
||||
return s.PrependNodes(sel.Nodes...)
|
||||
}
|
||||
|
||||
// PrependHtml parses the html and prepends it to the set of matched elements.
|
||||
func (s *Selection) PrependHtml(html string) *Selection {
|
||||
return s.PrependNodes(parseHtml(html)...)
|
||||
}
|
||||
|
||||
// PrependNodes prepends the specified nodes to each node in the set of
|
||||
// matched elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) PrependNodes(ns ...*html.Node) *Selection {
|
||||
return s.manipulateNodes(ns, true, func(sn *html.Node, n *html.Node) {
|
||||
// sn.FirstChild may be nil, in which case this functions like
|
||||
// sn.AppendChild()
|
||||
sn.InsertBefore(n, sn.FirstChild)
|
||||
})
|
||||
}
|
||||
|
||||
// Remove removes the set of matched elements from the document.
|
||||
// It returns the same selection, now consisting of nodes not in the document.
|
||||
func (s *Selection) Remove() *Selection {
|
||||
for _, n := range s.Nodes {
|
||||
if n.Parent != nil {
|
||||
n.Parent.RemoveChild(n)
|
||||
}
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// RemoveFiltered removes the set of matched elements by selector.
|
||||
// It returns the Selection of removed nodes.
|
||||
func (s *Selection) RemoveFiltered(selector string) *Selection {
|
||||
return s.RemoveMatcher(compileMatcher(selector))
|
||||
}
|
||||
|
||||
// RemoveMatcher removes the set of matched elements.
|
||||
// It returns the Selection of removed nodes.
|
||||
func (s *Selection) RemoveMatcher(m Matcher) *Selection {
|
||||
return s.FilterMatcher(m).Remove()
|
||||
}
|
||||
|
||||
// ReplaceWith replaces each element in the set of matched elements with the
|
||||
// nodes matched by the given selector.
|
||||
// It returns the removed elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) ReplaceWith(selector string) *Selection {
|
||||
return s.ReplaceWithMatcher(compileMatcher(selector))
|
||||
}
|
||||
|
||||
// ReplaceWithMatcher replaces each element in the set of matched elements with
|
||||
// the nodes matched by the given Matcher.
|
||||
// It returns the removed elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) ReplaceWithMatcher(m Matcher) *Selection {
|
||||
return s.ReplaceWithNodes(m.MatchAll(s.document.rootNode)...)
|
||||
}
|
||||
|
||||
// ReplaceWithSelection replaces each element in the set of matched elements with
|
||||
// the nodes from the given Selection.
|
||||
// It returns the removed elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) ReplaceWithSelection(sel *Selection) *Selection {
|
||||
return s.ReplaceWithNodes(sel.Nodes...)
|
||||
}
|
||||
|
||||
// ReplaceWithHtml replaces each element in the set of matched elements with
|
||||
// the parsed HTML.
|
||||
// It returns the removed elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) ReplaceWithHtml(html string) *Selection {
|
||||
return s.ReplaceWithNodes(parseHtml(html)...)
|
||||
}
|
||||
|
||||
// ReplaceWithNodes replaces each element in the set of matched elements with
|
||||
// the given nodes.
|
||||
// It returns the removed elements.
|
||||
//
|
||||
// This follows the same rules as Selection.Append.
|
||||
func (s *Selection) ReplaceWithNodes(ns ...*html.Node) *Selection {
|
||||
s.AfterNodes(ns...)
|
||||
return s.Remove()
|
||||
}
|
||||
|
||||
// SetHtml sets the html content of each element in the selection to
|
||||
// specified html string.
|
||||
func (s *Selection) SetHtml(html string) *Selection {
|
||||
return setHtmlNodes(s, parseHtml(html)...)
|
||||
}
|
||||
|
||||
// SetText sets the content of each element in the selection to specified content.
|
||||
// The provided text string is escaped.
|
||||
func (s *Selection) SetText(text string) *Selection {
|
||||
return s.SetHtml(html.EscapeString(text))
|
||||
}
|
||||
|
||||
// Unwrap removes the parents of the set of matched elements, leaving the matched
|
||||
// elements (and their siblings, if any) in their place.
|
||||
// It returns the original selection.
|
||||
func (s *Selection) Unwrap() *Selection {
|
||||
s.Parent().Each(func(i int, ss *Selection) {
|
||||
// For some reason, jquery allows unwrap to remove the <head> element, so
|
||||
// allowing it here too. Same for <html>. Why it allows those elements to
|
||||
// be unwrapped while not allowing body is a mystery to me.
|
||||
if ss.Nodes[0].Data != "body" {
|
||||
ss.ReplaceWithSelection(ss.Contents())
|
||||
}
|
||||
})
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// Wrap wraps each element in the set of matched elements inside the first
|
||||
// element matched by the given selector. The matched child is cloned before
|
||||
// being inserted into the document.
|
||||
//
|
||||
// It returns the original set of elements.
|
||||
func (s *Selection) Wrap(selector string) *Selection {
|
||||
return s.WrapMatcher(compileMatcher(selector))
|
||||
}
|
||||
|
||||
// WrapMatcher wraps each element in the set of matched elements inside the
|
||||
// first element matched by the given matcher. The matched child is cloned
|
||||
// before being inserted into the document.
|
||||
//
|
||||
// It returns the original set of elements.
|
||||
func (s *Selection) WrapMatcher(m Matcher) *Selection {
|
||||
return s.wrapNodes(m.MatchAll(s.document.rootNode)...)
|
||||
}
|
||||
|
||||
// WrapSelection wraps each element in the set of matched elements inside the
|
||||
// first element in the given Selection. The element is cloned before being
|
||||
// inserted into the document.
|
||||
//
|
||||
// It returns the original set of elements.
|
||||
func (s *Selection) WrapSelection(sel *Selection) *Selection {
|
||||
return s.wrapNodes(sel.Nodes...)
|
||||
}
|
||||
|
||||
// WrapHtml wraps each element in the set of matched elements inside the inner-
|
||||
// most child of the given HTML.
|
||||
//
|
||||
// It returns the original set of elements.
|
||||
func (s *Selection) WrapHtml(html string) *Selection {
|
||||
return s.wrapNodes(parseHtml(html)...)
|
||||
}
|
||||
|
||||
// WrapNode wraps each element in the set of matched elements inside the inner-
|
||||
// most child of the given node. The given node is copied before being inserted
|
||||
// into the document.
|
||||
//
|
||||
// It returns the original set of elements.
|
||||
func (s *Selection) WrapNode(n *html.Node) *Selection {
|
||||
return s.wrapNodes(n)
|
||||
}
|
||||
|
||||
func (s *Selection) wrapNodes(ns ...*html.Node) *Selection {
|
||||
s.Each(func(i int, ss *Selection) {
|
||||
ss.wrapAllNodes(ns...)
|
||||
})
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// WrapAll wraps a single HTML structure, matched by the given selector, around
|
||||
// all elements in the set of matched elements. The matched child is cloned
|
||||
// before being inserted into the document.
|
||||
//
|
||||
// It returns the original set of elements.
|
||||
func (s *Selection) WrapAll(selector string) *Selection {
|
||||
return s.WrapAllMatcher(compileMatcher(selector))
|
||||
}
|
||||
|
||||
// WrapAllMatcher wraps a single HTML structure, matched by the given Matcher,
|
||||
// around all elements in the set of matched elements. The matched child is
|
||||
// cloned before being inserted into the document.
|
||||
//
|
||||
// It returns the original set of elements.
|
||||
func (s *Selection) WrapAllMatcher(m Matcher) *Selection {
|
||||
return s.wrapAllNodes(m.MatchAll(s.document.rootNode)...)
|
||||
}
|
||||
|
||||
// WrapAllSelection wraps a single HTML structure, the first node of the given
|
||||
// Selection, around all elements in the set of matched elements. The matched
|
||||
// child is cloned before being inserted into the document.
|
||||
//
|
||||
// It returns the original set of elements.
|
||||
func (s *Selection) WrapAllSelection(sel *Selection) *Selection {
|
||||
return s.wrapAllNodes(sel.Nodes...)
|
||||
}
|
||||
|
||||
// WrapAllHtml wraps the given HTML structure around all elements in the set of
|
||||
// matched elements. The matched child is cloned before being inserted into the
|
||||
// document.
|
||||
//
|
||||
// It returns the original set of elements.
|
||||
func (s *Selection) WrapAllHtml(html string) *Selection {
|
||||
return s.wrapAllNodes(parseHtml(html)...)
|
||||
}
|
||||
|
||||
func (s *Selection) wrapAllNodes(ns ...*html.Node) *Selection {
|
||||
if len(ns) > 0 {
|
||||
return s.WrapAllNode(ns[0])
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// WrapAllNode wraps the given node around the first element in the Selection,
|
||||
// making all other nodes in the Selection children of the given node. The node
|
||||
// is cloned before being inserted into the document.
|
||||
//
|
||||
// It returns the original set of elements.
|
||||
func (s *Selection) WrapAllNode(n *html.Node) *Selection {
|
||||
if s.Size() == 0 {
|
||||
return s
|
||||
}
|
||||
|
||||
wrap := cloneNode(n)
|
||||
|
||||
first := s.Nodes[0]
|
||||
if first.Parent != nil {
|
||||
first.Parent.InsertBefore(wrap, first)
|
||||
first.Parent.RemoveChild(first)
|
||||
}
|
||||
|
||||
for c := getFirstChildEl(wrap); c != nil; c = getFirstChildEl(wrap) {
|
||||
wrap = c
|
||||
}
|
||||
|
||||
newSingleSelection(wrap, s.document).AppendSelection(s)
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// WrapInner wraps an HTML structure, matched by the given selector, around the
|
||||
// content of element in the set of matched elements. The matched child is
|
||||
// cloned before being inserted into the document.
|
||||
//
|
||||
// It returns the original set of elements.
|
||||
func (s *Selection) WrapInner(selector string) *Selection {
|
||||
return s.WrapInnerMatcher(compileMatcher(selector))
|
||||
}
|
||||
|
||||
// WrapInnerMatcher wraps an HTML structure, matched by the given selector,
|
||||
// around the content of element in the set of matched elements. The matched
|
||||
// child is cloned before being inserted into the document.
|
||||
//
|
||||
// It returns the original set of elements.
|
||||
func (s *Selection) WrapInnerMatcher(m Matcher) *Selection {
|
||||
return s.wrapInnerNodes(m.MatchAll(s.document.rootNode)...)
|
||||
}
|
||||
|
||||
// WrapInnerSelection wraps an HTML structure, matched by the given selector,
|
||||
// around the content of element in the set of matched elements. The matched
|
||||
// child is cloned before being inserted into the document.
|
||||
//
|
||||
// It returns the original set of elements.
|
||||
func (s *Selection) WrapInnerSelection(sel *Selection) *Selection {
|
||||
return s.wrapInnerNodes(sel.Nodes...)
|
||||
}
|
||||
|
||||
// WrapInnerHtml wraps an HTML structure, matched by the given selector, around
|
||||
// the content of element in the set of matched elements. The matched child is
|
||||
// cloned before being inserted into the document.
|
||||
//
|
||||
// It returns the original set of elements.
|
||||
func (s *Selection) WrapInnerHtml(html string) *Selection {
|
||||
return s.wrapInnerNodes(parseHtml(html)...)
|
||||
}
|
||||
|
||||
// WrapInnerNode wraps an HTML structure, matched by the given selector, around
|
||||
// the content of element in the set of matched elements. The matched child is
|
||||
// cloned before being inserted into the document.
|
||||
//
|
||||
// It returns the original set of elements.
|
||||
func (s *Selection) WrapInnerNode(n *html.Node) *Selection {
|
||||
return s.wrapInnerNodes(n)
|
||||
}
|
||||
|
||||
func (s *Selection) wrapInnerNodes(ns ...*html.Node) *Selection {
|
||||
if len(ns) == 0 {
|
||||
return s
|
||||
}
|
||||
|
||||
s.Each(func(i int, s *Selection) {
|
||||
contents := s.Contents()
|
||||
|
||||
if contents.Size() > 0 {
|
||||
contents.wrapAllNodes(ns...)
|
||||
} else {
|
||||
s.AppendNodes(cloneNode(ns[0]))
|
||||
}
|
||||
})
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func parseHtml(h string) []*html.Node {
|
||||
// Errors are only returned when the io.Reader returns any error besides
|
||||
// EOF, but strings.Reader never will
|
||||
nodes, err := html.ParseFragment(strings.NewReader(h), &html.Node{Type: html.ElementNode})
|
||||
if err != nil {
|
||||
panic("goquery: failed to parse HTML: " + err.Error())
|
||||
}
|
||||
return nodes
|
||||
}
|
||||
|
||||
func setHtmlNodes(s *Selection, ns ...*html.Node) *Selection {
|
||||
for _, n := range s.Nodes {
|
||||
for c := n.FirstChild; c != nil; c = n.FirstChild {
|
||||
n.RemoveChild(c)
|
||||
}
|
||||
for _, c := range ns {
|
||||
n.AppendChild(cloneNode(c))
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Get the first child that is an ElementNode
|
||||
func getFirstChildEl(n *html.Node) *html.Node {
|
||||
c := n.FirstChild
|
||||
for c != nil && c.Type != html.ElementNode {
|
||||
c = c.NextSibling
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// Deep copy a slice of nodes.
|
||||
func cloneNodes(ns []*html.Node) []*html.Node {
|
||||
cns := make([]*html.Node, 0, len(ns))
|
||||
|
||||
for _, n := range ns {
|
||||
cns = append(cns, cloneNode(n))
|
||||
}
|
||||
|
||||
return cns
|
||||
}
|
||||
|
||||
// Deep copy a node. The new node has clones of all the original node's
|
||||
// children but none of its parents or siblings.
|
||||
func cloneNode(n *html.Node) *html.Node {
|
||||
nn := &html.Node{
|
||||
Type: n.Type,
|
||||
DataAtom: n.DataAtom,
|
||||
Data: n.Data,
|
||||
Attr: make([]html.Attribute, len(n.Attr)),
|
||||
}
|
||||
|
||||
copy(nn.Attr, n.Attr)
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
nn.AppendChild(cloneNode(c))
|
||||
}
|
||||
|
||||
return nn
|
||||
}
|
||||
|
||||
func (s *Selection) manipulateNodes(ns []*html.Node, reverse bool,
|
||||
f func(sn *html.Node, n *html.Node)) *Selection {
|
||||
|
||||
lasti := s.Size() - 1
|
||||
|
||||
// net.Html doesn't provide document fragments for insertion, so to get
|
||||
// things in the correct order with After() and Prepend(), the callback
|
||||
// needs to be called on the reverse of the nodes.
|
||||
if reverse {
|
||||
for i, j := 0, len(ns)-1; i < j; i, j = i+1, j-1 {
|
||||
ns[i], ns[j] = ns[j], ns[i]
|
||||
}
|
||||
}
|
||||
|
||||
for i, sn := range s.Nodes {
|
||||
for _, n := range ns {
|
||||
if i != lasti {
|
||||
f(sn, cloneNode(n))
|
||||
} else {
|
||||
if n.Parent != nil {
|
||||
n.Parent.RemoveChild(n)
|
||||
}
|
||||
f(sn, n)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
|
@ -0,0 +1,275 @@
|
|||
package goquery
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
var rxClassTrim = regexp.MustCompile("[\t\r\n]")
|
||||
|
||||
// Attr gets the specified attribute's value for the first element in the
|
||||
// Selection. To get the value for each element individually, use a looping
|
||||
// construct such as Each or Map method.
|
||||
func (s *Selection) Attr(attrName string) (val string, exists bool) {
|
||||
if len(s.Nodes) == 0 {
|
||||
return
|
||||
}
|
||||
return getAttributeValue(attrName, s.Nodes[0])
|
||||
}
|
||||
|
||||
// AttrOr works like Attr but returns default value if attribute is not present.
|
||||
func (s *Selection) AttrOr(attrName, defaultValue string) string {
|
||||
if len(s.Nodes) == 0 {
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
val, exists := getAttributeValue(attrName, s.Nodes[0])
|
||||
if !exists {
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
return val
|
||||
}
|
||||
|
||||
// RemoveAttr removes the named attribute from each element in the set of matched elements.
|
||||
func (s *Selection) RemoveAttr(attrName string) *Selection {
|
||||
for _, n := range s.Nodes {
|
||||
removeAttr(n, attrName)
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// SetAttr sets the given attribute on each element in the set of matched elements.
|
||||
func (s *Selection) SetAttr(attrName, val string) *Selection {
|
||||
for _, n := range s.Nodes {
|
||||
attr := getAttributePtr(attrName, n)
|
||||
if attr == nil {
|
||||
n.Attr = append(n.Attr, html.Attribute{Key: attrName, Val: val})
|
||||
} else {
|
||||
attr.Val = val
|
||||
}
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// Text gets the combined text contents of each element in the set of matched
|
||||
// elements, including their descendants.
|
||||
func (s *Selection) Text() string {
|
||||
var buf bytes.Buffer
|
||||
|
||||
// Slightly optimized vs calling Each: no single selection object created
|
||||
var f func(*html.Node)
|
||||
f = func(n *html.Node) {
|
||||
if n.Type == html.TextNode {
|
||||
// Keep newlines and spaces, like jQuery
|
||||
buf.WriteString(n.Data)
|
||||
}
|
||||
if n.FirstChild != nil {
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
f(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, n := range s.Nodes {
|
||||
f(n)
|
||||
}
|
||||
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// Size is an alias for Length.
|
||||
func (s *Selection) Size() int {
|
||||
return s.Length()
|
||||
}
|
||||
|
||||
// Length returns the number of elements in the Selection object.
|
||||
func (s *Selection) Length() int {
|
||||
return len(s.Nodes)
|
||||
}
|
||||
|
||||
// Html gets the HTML contents of the first element in the set of matched
|
||||
// elements. It includes text and comment nodes.
|
||||
func (s *Selection) Html() (ret string, e error) {
|
||||
// Since there is no .innerHtml, the HTML content must be re-created from
|
||||
// the nodes using html.Render.
|
||||
var buf bytes.Buffer
|
||||
|
||||
if len(s.Nodes) > 0 {
|
||||
for c := s.Nodes[0].FirstChild; c != nil; c = c.NextSibling {
|
||||
e = html.Render(&buf, c)
|
||||
if e != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
ret = buf.String()
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// AddClass adds the given class(es) to each element in the set of matched elements.
|
||||
// Multiple class names can be specified, separated by a space or via multiple arguments.
|
||||
func (s *Selection) AddClass(class ...string) *Selection {
|
||||
classStr := strings.TrimSpace(strings.Join(class, " "))
|
||||
|
||||
if classStr == "" {
|
||||
return s
|
||||
}
|
||||
|
||||
tcls := getClassesSlice(classStr)
|
||||
for _, n := range s.Nodes {
|
||||
curClasses, attr := getClassesAndAttr(n, true)
|
||||
for _, newClass := range tcls {
|
||||
if !strings.Contains(curClasses, " "+newClass+" ") {
|
||||
curClasses += newClass + " "
|
||||
}
|
||||
}
|
||||
|
||||
setClasses(n, attr, curClasses)
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// HasClass determines whether any of the matched elements are assigned the
|
||||
// given class.
|
||||
func (s *Selection) HasClass(class string) bool {
|
||||
class = " " + class + " "
|
||||
for _, n := range s.Nodes {
|
||||
classes, _ := getClassesAndAttr(n, false)
|
||||
if strings.Contains(classes, class) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// RemoveClass removes the given class(es) from each element in the set of matched elements.
|
||||
// Multiple class names can be specified, separated by a space or via multiple arguments.
|
||||
// If no class name is provided, all classes are removed.
|
||||
func (s *Selection) RemoveClass(class ...string) *Selection {
|
||||
var rclasses []string
|
||||
|
||||
classStr := strings.TrimSpace(strings.Join(class, " "))
|
||||
remove := classStr == ""
|
||||
|
||||
if !remove {
|
||||
rclasses = getClassesSlice(classStr)
|
||||
}
|
||||
|
||||
for _, n := range s.Nodes {
|
||||
if remove {
|
||||
removeAttr(n, "class")
|
||||
} else {
|
||||
classes, attr := getClassesAndAttr(n, true)
|
||||
for _, rcl := range rclasses {
|
||||
classes = strings.Replace(classes, " "+rcl+" ", " ", -1)
|
||||
}
|
||||
|
||||
setClasses(n, attr, classes)
|
||||
}
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// ToggleClass adds or removes the given class(es) for each element in the set of matched elements.
|
||||
// Multiple class names can be specified, separated by a space or via multiple arguments.
|
||||
func (s *Selection) ToggleClass(class ...string) *Selection {
|
||||
classStr := strings.TrimSpace(strings.Join(class, " "))
|
||||
|
||||
if classStr == "" {
|
||||
return s
|
||||
}
|
||||
|
||||
tcls := getClassesSlice(classStr)
|
||||
|
||||
for _, n := range s.Nodes {
|
||||
classes, attr := getClassesAndAttr(n, true)
|
||||
for _, tcl := range tcls {
|
||||
if strings.Contains(classes, " "+tcl+" ") {
|
||||
classes = strings.Replace(classes, " "+tcl+" ", " ", -1)
|
||||
} else {
|
||||
classes += tcl + " "
|
||||
}
|
||||
}
|
||||
|
||||
setClasses(n, attr, classes)
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func getAttributePtr(attrName string, n *html.Node) *html.Attribute {
|
||||
if n == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
for i, a := range n.Attr {
|
||||
if a.Key == attrName {
|
||||
return &n.Attr[i]
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Private function to get the specified attribute's value from a node.
|
||||
func getAttributeValue(attrName string, n *html.Node) (val string, exists bool) {
|
||||
if a := getAttributePtr(attrName, n); a != nil {
|
||||
val = a.Val
|
||||
exists = true
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Get and normalize the "class" attribute from the node.
|
||||
func getClassesAndAttr(n *html.Node, create bool) (classes string, attr *html.Attribute) {
|
||||
// Applies only to element nodes
|
||||
if n.Type == html.ElementNode {
|
||||
attr = getAttributePtr("class", n)
|
||||
if attr == nil && create {
|
||||
n.Attr = append(n.Attr, html.Attribute{
|
||||
Key: "class",
|
||||
Val: "",
|
||||
})
|
||||
attr = &n.Attr[len(n.Attr)-1]
|
||||
}
|
||||
}
|
||||
|
||||
if attr == nil {
|
||||
classes = " "
|
||||
} else {
|
||||
classes = rxClassTrim.ReplaceAllString(" "+attr.Val+" ", " ")
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func getClassesSlice(classes string) []string {
|
||||
return strings.Split(rxClassTrim.ReplaceAllString(" "+classes+" ", " "), " ")
|
||||
}
|
||||
|
||||
func removeAttr(n *html.Node, attrName string) {
|
||||
for i, a := range n.Attr {
|
||||
if a.Key == attrName {
|
||||
n.Attr[i], n.Attr[len(n.Attr)-1], n.Attr =
|
||||
n.Attr[len(n.Attr)-1], html.Attribute{}, n.Attr[:len(n.Attr)-1]
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func setClasses(n *html.Node, attr *html.Attribute, classes string) {
|
||||
classes = strings.TrimSpace(classes)
|
||||
if classes == "" {
|
||||
removeAttr(n, "class")
|
||||
return
|
||||
}
|
||||
|
||||
attr.Val = classes
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
package goquery
|
||||
|
||||
import "golang.org/x/net/html"
|
||||
|
||||
// Is checks the current matched set of elements against a selector and
|
||||
// returns true if at least one of these elements matches.
|
||||
func (s *Selection) Is(selector string) bool {
|
||||
return s.IsMatcher(compileMatcher(selector))
|
||||
}
|
||||
|
||||
// IsMatcher checks the current matched set of elements against a matcher and
|
||||
// returns true if at least one of these elements matches.
|
||||
func (s *Selection) IsMatcher(m Matcher) bool {
|
||||
if len(s.Nodes) > 0 {
|
||||
if len(s.Nodes) == 1 {
|
||||
return m.Match(s.Nodes[0])
|
||||
}
|
||||
return len(m.Filter(s.Nodes)) > 0
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// IsFunction checks the current matched set of elements against a predicate and
|
||||
// returns true if at least one of these elements matches.
|
||||
func (s *Selection) IsFunction(f func(int, *Selection) bool) bool {
|
||||
return s.FilterFunction(f).Length() > 0
|
||||
}
|
||||
|
||||
// IsSelection checks the current matched set of elements against a Selection object
|
||||
// and returns true if at least one of these elements matches.
|
||||
func (s *Selection) IsSelection(sel *Selection) bool {
|
||||
return s.FilterSelection(sel).Length() > 0
|
||||
}
|
||||
|
||||
// IsNodes checks the current matched set of elements against the specified nodes
|
||||
// and returns true if at least one of these elements matches.
|
||||
func (s *Selection) IsNodes(nodes ...*html.Node) bool {
|
||||
return s.FilterNodes(nodes...).Length() > 0
|
||||
}
|
||||
|
||||
// Contains returns true if the specified Node is within,
|
||||
// at any depth, one of the nodes in the Selection object.
|
||||
// It is NOT inclusive, to behave like jQuery's implementation, and
|
||||
// unlike Javascript's .contains, so if the contained
|
||||
// node is itself in the selection, it returns false.
|
||||
func (s *Selection) Contains(n *html.Node) bool {
|
||||
return sliceContains(s.Nodes, n)
|
||||
}
|
|
@ -0,0 +1,698 @@
|
|||
package goquery
|
||||
|
||||
import "golang.org/x/net/html"
|
||||
|
||||
type siblingType int
|
||||
|
||||
// Sibling type, used internally when iterating over children at the same
|
||||
// level (siblings) to specify which nodes are requested.
|
||||
const (
|
||||
siblingPrevUntil siblingType = iota - 3
|
||||
siblingPrevAll
|
||||
siblingPrev
|
||||
siblingAll
|
||||
siblingNext
|
||||
siblingNextAll
|
||||
siblingNextUntil
|
||||
siblingAllIncludingNonElements
|
||||
)
|
||||
|
||||
// Find gets the descendants of each element in the current set of matched
|
||||
// elements, filtered by a selector. It returns a new Selection object
|
||||
// containing these matched elements.
|
||||
func (s *Selection) Find(selector string) *Selection {
|
||||
return pushStack(s, findWithMatcher(s.Nodes, compileMatcher(selector)))
|
||||
}
|
||||
|
||||
// FindMatcher gets the descendants of each element in the current set of matched
|
||||
// elements, filtered by the matcher. It returns a new Selection object
|
||||
// containing these matched elements.
|
||||
func (s *Selection) FindMatcher(m Matcher) *Selection {
|
||||
return pushStack(s, findWithMatcher(s.Nodes, m))
|
||||
}
|
||||
|
||||
// FindSelection gets the descendants of each element in the current
|
||||
// Selection, filtered by a Selection. It returns a new Selection object
|
||||
// containing these matched elements.
|
||||
func (s *Selection) FindSelection(sel *Selection) *Selection {
|
||||
if sel == nil {
|
||||
return pushStack(s, nil)
|
||||
}
|
||||
return s.FindNodes(sel.Nodes...)
|
||||
}
|
||||
|
||||
// FindNodes gets the descendants of each element in the current
|
||||
// Selection, filtered by some nodes. It returns a new Selection object
|
||||
// containing these matched elements.
|
||||
func (s *Selection) FindNodes(nodes ...*html.Node) *Selection {
|
||||
return pushStack(s, mapNodes(nodes, func(i int, n *html.Node) []*html.Node {
|
||||
if sliceContains(s.Nodes, n) {
|
||||
return []*html.Node{n}
|
||||
}
|
||||
return nil
|
||||
}))
|
||||
}
|
||||
|
||||
// Contents gets the children of each element in the Selection,
|
||||
// including text and comment nodes. It returns a new Selection object
|
||||
// containing these elements.
|
||||
func (s *Selection) Contents() *Selection {
|
||||
return pushStack(s, getChildrenNodes(s.Nodes, siblingAllIncludingNonElements))
|
||||
}
|
||||
|
||||
// ContentsFiltered gets the children of each element in the Selection,
|
||||
// filtered by the specified selector. It returns a new Selection
|
||||
// object containing these elements. Since selectors only act on Element nodes,
|
||||
// this function is an alias to ChildrenFiltered unless the selector is empty,
|
||||
// in which case it is an alias to Contents.
|
||||
func (s *Selection) ContentsFiltered(selector string) *Selection {
|
||||
if selector != "" {
|
||||
return s.ChildrenFiltered(selector)
|
||||
}
|
||||
return s.Contents()
|
||||
}
|
||||
|
||||
// ContentsMatcher gets the children of each element in the Selection,
|
||||
// filtered by the specified matcher. It returns a new Selection
|
||||
// object containing these elements. Since matchers only act on Element nodes,
|
||||
// this function is an alias to ChildrenMatcher.
|
||||
func (s *Selection) ContentsMatcher(m Matcher) *Selection {
|
||||
return s.ChildrenMatcher(m)
|
||||
}
|
||||
|
||||
// Children gets the child elements of each element in the Selection.
|
||||
// It returns a new Selection object containing these elements.
|
||||
func (s *Selection) Children() *Selection {
|
||||
return pushStack(s, getChildrenNodes(s.Nodes, siblingAll))
|
||||
}
|
||||
|
||||
// ChildrenFiltered gets the child elements of each element in the Selection,
|
||||
// filtered by the specified selector. It returns a new
|
||||
// Selection object containing these elements.
|
||||
func (s *Selection) ChildrenFiltered(selector string) *Selection {
|
||||
return filterAndPush(s, getChildrenNodes(s.Nodes, siblingAll), compileMatcher(selector))
|
||||
}
|
||||
|
||||
// ChildrenMatcher gets the child elements of each element in the Selection,
|
||||
// filtered by the specified matcher. It returns a new
|
||||
// Selection object containing these elements.
|
||||
func (s *Selection) ChildrenMatcher(m Matcher) *Selection {
|
||||
return filterAndPush(s, getChildrenNodes(s.Nodes, siblingAll), m)
|
||||
}
|
||||
|
||||
// Parent gets the parent of each element in the Selection. It returns a
|
||||
// new Selection object containing the matched elements.
|
||||
func (s *Selection) Parent() *Selection {
|
||||
return pushStack(s, getParentNodes(s.Nodes))
|
||||
}
|
||||
|
||||
// ParentFiltered gets the parent of each element in the Selection filtered by a
|
||||
// selector. It returns a new Selection object containing the matched elements.
|
||||
func (s *Selection) ParentFiltered(selector string) *Selection {
|
||||
return filterAndPush(s, getParentNodes(s.Nodes), compileMatcher(selector))
|
||||
}
|
||||
|
||||
// ParentMatcher gets the parent of each element in the Selection filtered by a
|
||||
// matcher. It returns a new Selection object containing the matched elements.
|
||||
func (s *Selection) ParentMatcher(m Matcher) *Selection {
|
||||
return filterAndPush(s, getParentNodes(s.Nodes), m)
|
||||
}
|
||||
|
||||
// Closest gets the first element that matches the selector by testing the
|
||||
// element itself and traversing up through its ancestors in the DOM tree.
|
||||
func (s *Selection) Closest(selector string) *Selection {
|
||||
cs := compileMatcher(selector)
|
||||
return s.ClosestMatcher(cs)
|
||||
}
|
||||
|
||||
// ClosestMatcher gets the first element that matches the matcher by testing the
|
||||
// element itself and traversing up through its ancestors in the DOM tree.
|
||||
func (s *Selection) ClosestMatcher(m Matcher) *Selection {
|
||||
return pushStack(s, mapNodes(s.Nodes, func(i int, n *html.Node) []*html.Node {
|
||||
// For each node in the selection, test the node itself, then each parent
|
||||
// until a match is found.
|
||||
for ; n != nil; n = n.Parent {
|
||||
if m.Match(n) {
|
||||
return []*html.Node{n}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}))
|
||||
}
|
||||
|
||||
// ClosestNodes gets the first element that matches one of the nodes by testing the
|
||||
// element itself and traversing up through its ancestors in the DOM tree.
|
||||
func (s *Selection) ClosestNodes(nodes ...*html.Node) *Selection {
|
||||
set := make(map[*html.Node]bool)
|
||||
for _, n := range nodes {
|
||||
set[n] = true
|
||||
}
|
||||
return pushStack(s, mapNodes(s.Nodes, func(i int, n *html.Node) []*html.Node {
|
||||
// For each node in the selection, test the node itself, then each parent
|
||||
// until a match is found.
|
||||
for ; n != nil; n = n.Parent {
|
||||
if set[n] {
|
||||
return []*html.Node{n}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}))
|
||||
}
|
||||
|
||||
// ClosestSelection gets the first element that matches one of the nodes in the
|
||||
// Selection by testing the element itself and traversing up through its ancestors
|
||||
// in the DOM tree.
|
||||
func (s *Selection) ClosestSelection(sel *Selection) *Selection {
|
||||
if sel == nil {
|
||||
return pushStack(s, nil)
|
||||
}
|
||||
return s.ClosestNodes(sel.Nodes...)
|
||||
}
|
||||
|
||||
// Parents gets the ancestors of each element in the current Selection. It
|
||||
// returns a new Selection object with the matched elements.
|
||||
func (s *Selection) Parents() *Selection {
|
||||
return pushStack(s, getParentsNodes(s.Nodes, nil, nil))
|
||||
}
|
||||
|
||||
// ParentsFiltered gets the ancestors of each element in the current
|
||||
// Selection. It returns a new Selection object with the matched elements.
|
||||
func (s *Selection) ParentsFiltered(selector string) *Selection {
|
||||
return filterAndPush(s, getParentsNodes(s.Nodes, nil, nil), compileMatcher(selector))
|
||||
}
|
||||
|
||||
// ParentsMatcher gets the ancestors of each element in the current
|
||||
// Selection. It returns a new Selection object with the matched elements.
|
||||
func (s *Selection) ParentsMatcher(m Matcher) *Selection {
|
||||
return filterAndPush(s, getParentsNodes(s.Nodes, nil, nil), m)
|
||||
}
|
||||
|
||||
// ParentsUntil gets the ancestors of each element in the Selection, up to but
|
||||
// not including the element matched by the selector. It returns a new Selection
|
||||
// object containing the matched elements.
|
||||
func (s *Selection) ParentsUntil(selector string) *Selection {
|
||||
return pushStack(s, getParentsNodes(s.Nodes, compileMatcher(selector), nil))
|
||||
}
|
||||
|
||||
// ParentsUntilMatcher gets the ancestors of each element in the Selection, up to but
|
||||
// not including the element matched by the matcher. It returns a new Selection
|
||||
// object containing the matched elements.
|
||||
func (s *Selection) ParentsUntilMatcher(m Matcher) *Selection {
|
||||
return pushStack(s, getParentsNodes(s.Nodes, m, nil))
|
||||
}
|
||||
|
||||
// ParentsUntilSelection gets the ancestors of each element in the Selection,
|
||||
// up to but not including the elements in the specified Selection. It returns a
|
||||
// new Selection object containing the matched elements.
|
||||
func (s *Selection) ParentsUntilSelection(sel *Selection) *Selection {
|
||||
if sel == nil {
|
||||
return s.Parents()
|
||||
}
|
||||
return s.ParentsUntilNodes(sel.Nodes...)
|
||||
}
|
||||
|
||||
// ParentsUntilNodes gets the ancestors of each element in the Selection,
|
||||
// up to but not including the specified nodes. It returns a
|
||||
// new Selection object containing the matched elements.
|
||||
func (s *Selection) ParentsUntilNodes(nodes ...*html.Node) *Selection {
|
||||
return pushStack(s, getParentsNodes(s.Nodes, nil, nodes))
|
||||
}
|
||||
|
||||
// ParentsFilteredUntil is like ParentsUntil, with the option to filter the
|
||||
// results based on a selector string. It returns a new Selection
|
||||
// object containing the matched elements.
|
||||
func (s *Selection) ParentsFilteredUntil(filterSelector, untilSelector string) *Selection {
|
||||
return filterAndPush(s, getParentsNodes(s.Nodes, compileMatcher(untilSelector), nil), compileMatcher(filterSelector))
|
||||
}
|
||||
|
||||
// ParentsFilteredUntilMatcher is like ParentsUntilMatcher, with the option to filter the
|
||||
// results based on a matcher. It returns a new Selection object containing the matched elements.
|
||||
func (s *Selection) ParentsFilteredUntilMatcher(filter, until Matcher) *Selection {
|
||||
return filterAndPush(s, getParentsNodes(s.Nodes, until, nil), filter)
|
||||
}
|
||||
|
||||
// ParentsFilteredUntilSelection is like ParentsUntilSelection, with the
|
||||
// option to filter the results based on a selector string. It returns a new
|
||||
// Selection object containing the matched elements.
|
||||
func (s *Selection) ParentsFilteredUntilSelection(filterSelector string, sel *Selection) *Selection {
|
||||
return s.ParentsMatcherUntilSelection(compileMatcher(filterSelector), sel)
|
||||
}
|
||||
|
||||
// ParentsMatcherUntilSelection is like ParentsUntilSelection, with the
|
||||
// option to filter the results based on a matcher. It returns a new
|
||||
// Selection object containing the matched elements.
|
||||
func (s *Selection) ParentsMatcherUntilSelection(filter Matcher, sel *Selection) *Selection {
|
||||
if sel == nil {
|
||||
return s.ParentsMatcher(filter)
|
||||
}
|
||||
return s.ParentsMatcherUntilNodes(filter, sel.Nodes...)
|
||||
}
|
||||
|
||||
// ParentsFilteredUntilNodes is like ParentsUntilNodes, with the
|
||||
// option to filter the results based on a selector string. It returns a new
|
||||
// Selection object containing the matched elements.
|
||||
func (s *Selection) ParentsFilteredUntilNodes(filterSelector string, nodes ...*html.Node) *Selection {
|
||||
return filterAndPush(s, getParentsNodes(s.Nodes, nil, nodes), compileMatcher(filterSelector))
|
||||
}
|
||||
|
||||
// ParentsMatcherUntilNodes is like ParentsUntilNodes, with the
|
||||
// option to filter the results based on a matcher. It returns a new
|
||||
// Selection object containing the matched elements.
|
||||
func (s *Selection) ParentsMatcherUntilNodes(filter Matcher, nodes ...*html.Node) *Selection {
|
||||
return filterAndPush(s, getParentsNodes(s.Nodes, nil, nodes), filter)
|
||||
}
|
||||
|
||||
// Siblings gets the siblings of each element in the Selection. It returns
|
||||
// a new Selection object containing the matched elements.
|
||||
func (s *Selection) Siblings() *Selection {
|
||||
return pushStack(s, getSiblingNodes(s.Nodes, siblingAll, nil, nil))
|
||||
}
|
||||
|
||||
// SiblingsFiltered gets the siblings of each element in the Selection
|
||||
// filtered by a selector. It returns a new Selection object containing the
|
||||
// matched elements.
|
||||
func (s *Selection) SiblingsFiltered(selector string) *Selection {
|
||||
return filterAndPush(s, getSiblingNodes(s.Nodes, siblingAll, nil, nil), compileMatcher(selector))
|
||||
}
|
||||
|
||||
// SiblingsMatcher gets the siblings of each element in the Selection
|
||||
// filtered by a matcher. It returns a new Selection object containing the
|
||||
// matched elements.
|
||||
func (s *Selection) SiblingsMatcher(m Matcher) *Selection {
|
||||
return filterAndPush(s, getSiblingNodes(s.Nodes, siblingAll, nil, nil), m)
|
||||
}
|
||||
|
||||
// Next gets the immediately following sibling of each element in the
|
||||
// Selection. It returns a new Selection object containing the matched elements.
|
||||
func (s *Selection) Next() *Selection {
|
||||
return pushStack(s, getSiblingNodes(s.Nodes, siblingNext, nil, nil))
|
||||
}
|
||||
|
||||
// NextFiltered gets the immediately following sibling of each element in the
|
||||
// Selection filtered by a selector. It returns a new Selection object
|
||||
// containing the matched elements.
|
||||
func (s *Selection) NextFiltered(selector string) *Selection {
|
||||
return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNext, nil, nil), compileMatcher(selector))
|
||||
}
|
||||
|
||||
// NextMatcher gets the immediately following sibling of each element in the
|
||||
// Selection filtered by a matcher. It returns a new Selection object
|
||||
// containing the matched elements.
|
||||
func (s *Selection) NextMatcher(m Matcher) *Selection {
|
||||
return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNext, nil, nil), m)
|
||||
}
|
||||
|
||||
// NextAll gets all the following siblings of each element in the
|
||||
// Selection. It returns a new Selection object containing the matched elements.
|
||||
func (s *Selection) NextAll() *Selection {
|
||||
return pushStack(s, getSiblingNodes(s.Nodes, siblingNextAll, nil, nil))
|
||||
}
|
||||
|
||||
// NextAllFiltered gets all the following siblings of each element in the
|
||||
// Selection filtered by a selector. It returns a new Selection object
|
||||
// containing the matched elements.
|
||||
func (s *Selection) NextAllFiltered(selector string) *Selection {
|
||||
return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextAll, nil, nil), compileMatcher(selector))
|
||||
}
|
||||
|
||||
// NextAllMatcher gets all the following siblings of each element in the
|
||||
// Selection filtered by a matcher. It returns a new Selection object
|
||||
// containing the matched elements.
|
||||
func (s *Selection) NextAllMatcher(m Matcher) *Selection {
|
||||
return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextAll, nil, nil), m)
|
||||
}
|
||||
|
||||
// Prev gets the immediately preceding sibling of each element in the
|
||||
// Selection. It returns a new Selection object containing the matched elements.
|
||||
func (s *Selection) Prev() *Selection {
|
||||
return pushStack(s, getSiblingNodes(s.Nodes, siblingPrev, nil, nil))
|
||||
}
|
||||
|
||||
// PrevFiltered gets the immediately preceding sibling of each element in the
|
||||
// Selection filtered by a selector. It returns a new Selection object
|
||||
// containing the matched elements.
|
||||
func (s *Selection) PrevFiltered(selector string) *Selection {
|
||||
return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrev, nil, nil), compileMatcher(selector))
|
||||
}
|
||||
|
||||
// PrevMatcher gets the immediately preceding sibling of each element in the
|
||||
// Selection filtered by a matcher. It returns a new Selection object
|
||||
// containing the matched elements.
|
||||
func (s *Selection) PrevMatcher(m Matcher) *Selection {
|
||||
return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrev, nil, nil), m)
|
||||
}
|
||||
|
||||
// PrevAll gets all the preceding siblings of each element in the
|
||||
// Selection. It returns a new Selection object containing the matched elements.
|
||||
func (s *Selection) PrevAll() *Selection {
|
||||
return pushStack(s, getSiblingNodes(s.Nodes, siblingPrevAll, nil, nil))
|
||||
}
|
||||
|
||||
// PrevAllFiltered gets all the preceding siblings of each element in the
|
||||
// Selection filtered by a selector. It returns a new Selection object
|
||||
// containing the matched elements.
|
||||
func (s *Selection) PrevAllFiltered(selector string) *Selection {
|
||||
return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevAll, nil, nil), compileMatcher(selector))
|
||||
}
|
||||
|
||||
// PrevAllMatcher gets all the preceding siblings of each element in the
|
||||
// Selection filtered by a matcher. It returns a new Selection object
|
||||
// containing the matched elements.
|
||||
func (s *Selection) PrevAllMatcher(m Matcher) *Selection {
|
||||
return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevAll, nil, nil), m)
|
||||
}
|
||||
|
||||
// NextUntil gets all following siblings of each element up to but not
|
||||
// including the element matched by the selector. It returns a new Selection
|
||||
// object containing the matched elements.
|
||||
func (s *Selection) NextUntil(selector string) *Selection {
|
||||
return pushStack(s, getSiblingNodes(s.Nodes, siblingNextUntil,
|
||||
compileMatcher(selector), nil))
|
||||
}
|
||||
|
||||
// NextUntilMatcher gets all following siblings of each element up to but not
|
||||
// including the element matched by the matcher. It returns a new Selection
|
||||
// object containing the matched elements.
|
||||
func (s *Selection) NextUntilMatcher(m Matcher) *Selection {
|
||||
return pushStack(s, getSiblingNodes(s.Nodes, siblingNextUntil,
|
||||
m, nil))
|
||||
}
|
||||
|
||||
// NextUntilSelection gets all following siblings of each element up to but not
|
||||
// including the element matched by the Selection. It returns a new Selection
|
||||
// object containing the matched elements.
|
||||
func (s *Selection) NextUntilSelection(sel *Selection) *Selection {
|
||||
if sel == nil {
|
||||
return s.NextAll()
|
||||
}
|
||||
return s.NextUntilNodes(sel.Nodes...)
|
||||
}
|
||||
|
||||
// NextUntilNodes gets all following siblings of each element up to but not
|
||||
// including the element matched by the nodes. It returns a new Selection
|
||||
// object containing the matched elements.
|
||||
func (s *Selection) NextUntilNodes(nodes ...*html.Node) *Selection {
|
||||
return pushStack(s, getSiblingNodes(s.Nodes, siblingNextUntil,
|
||||
nil, nodes))
|
||||
}
|
||||
|
||||
// PrevUntil gets all preceding siblings of each element up to but not
|
||||
// including the element matched by the selector. It returns a new Selection
|
||||
// object containing the matched elements.
|
||||
func (s *Selection) PrevUntil(selector string) *Selection {
|
||||
return pushStack(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
|
||||
compileMatcher(selector), nil))
|
||||
}
|
||||
|
||||
// PrevUntilMatcher gets all preceding siblings of each element up to but not
|
||||
// including the element matched by the matcher. It returns a new Selection
|
||||
// object containing the matched elements.
|
||||
func (s *Selection) PrevUntilMatcher(m Matcher) *Selection {
|
||||
return pushStack(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
|
||||
m, nil))
|
||||
}
|
||||
|
||||
// PrevUntilSelection gets all preceding siblings of each element up to but not
|
||||
// including the element matched by the Selection. It returns a new Selection
|
||||
// object containing the matched elements.
|
||||
func (s *Selection) PrevUntilSelection(sel *Selection) *Selection {
|
||||
if sel == nil {
|
||||
return s.PrevAll()
|
||||
}
|
||||
return s.PrevUntilNodes(sel.Nodes...)
|
||||
}
|
||||
|
||||
// PrevUntilNodes gets all preceding siblings of each element up to but not
|
||||
// including the element matched by the nodes. It returns a new Selection
|
||||
// object containing the matched elements.
|
||||
func (s *Selection) PrevUntilNodes(nodes ...*html.Node) *Selection {
|
||||
return pushStack(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
|
||||
nil, nodes))
|
||||
}
|
||||
|
||||
// NextFilteredUntil is like NextUntil, with the option to filter
|
||||
// the results based on a selector string.
|
||||
// It returns a new Selection object containing the matched elements.
|
||||
func (s *Selection) NextFilteredUntil(filterSelector, untilSelector string) *Selection {
|
||||
return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextUntil,
|
||||
compileMatcher(untilSelector), nil), compileMatcher(filterSelector))
|
||||
}
|
||||
|
||||
// NextFilteredUntilMatcher is like NextUntilMatcher, with the option to filter
|
||||
// the results based on a matcher.
|
||||
// It returns a new Selection object containing the matched elements.
|
||||
func (s *Selection) NextFilteredUntilMatcher(filter, until Matcher) *Selection {
|
||||
return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextUntil,
|
||||
until, nil), filter)
|
||||
}
|
||||
|
||||
// NextFilteredUntilSelection is like NextUntilSelection, with the
|
||||
// option to filter the results based on a selector string. It returns a new
|
||||
// Selection object containing the matched elements.
|
||||
func (s *Selection) NextFilteredUntilSelection(filterSelector string, sel *Selection) *Selection {
|
||||
return s.NextMatcherUntilSelection(compileMatcher(filterSelector), sel)
|
||||
}
|
||||
|
||||
// NextMatcherUntilSelection is like NextUntilSelection, with the
|
||||
// option to filter the results based on a matcher. It returns a new
|
||||
// Selection object containing the matched elements.
|
||||
func (s *Selection) NextMatcherUntilSelection(filter Matcher, sel *Selection) *Selection {
|
||||
if sel == nil {
|
||||
return s.NextMatcher(filter)
|
||||
}
|
||||
return s.NextMatcherUntilNodes(filter, sel.Nodes...)
|
||||
}
|
||||
|
||||
// NextFilteredUntilNodes is like NextUntilNodes, with the
|
||||
// option to filter the results based on a selector string. It returns a new
|
||||
// Selection object containing the matched elements.
|
||||
func (s *Selection) NextFilteredUntilNodes(filterSelector string, nodes ...*html.Node) *Selection {
|
||||
return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextUntil,
|
||||
nil, nodes), compileMatcher(filterSelector))
|
||||
}
|
||||
|
||||
// NextMatcherUntilNodes is like NextUntilNodes, with the
|
||||
// option to filter the results based on a matcher. It returns a new
|
||||
// Selection object containing the matched elements.
|
||||
func (s *Selection) NextMatcherUntilNodes(filter Matcher, nodes ...*html.Node) *Selection {
|
||||
return filterAndPush(s, getSiblingNodes(s.Nodes, siblingNextUntil,
|
||||
nil, nodes), filter)
|
||||
}
|
||||
|
||||
// PrevFilteredUntil is like PrevUntil, with the option to filter
|
||||
// the results based on a selector string.
|
||||
// It returns a new Selection object containing the matched elements.
|
||||
func (s *Selection) PrevFilteredUntil(filterSelector, untilSelector string) *Selection {
|
||||
return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
|
||||
compileMatcher(untilSelector), nil), compileMatcher(filterSelector))
|
||||
}
|
||||
|
||||
// PrevFilteredUntilMatcher is like PrevUntilMatcher, with the option to filter
|
||||
// the results based on a matcher.
|
||||
// It returns a new Selection object containing the matched elements.
|
||||
func (s *Selection) PrevFilteredUntilMatcher(filter, until Matcher) *Selection {
|
||||
return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
|
||||
until, nil), filter)
|
||||
}
|
||||
|
||||
// PrevFilteredUntilSelection is like PrevUntilSelection, with the
|
||||
// option to filter the results based on a selector string. It returns a new
|
||||
// Selection object containing the matched elements.
|
||||
func (s *Selection) PrevFilteredUntilSelection(filterSelector string, sel *Selection) *Selection {
|
||||
return s.PrevMatcherUntilSelection(compileMatcher(filterSelector), sel)
|
||||
}
|
||||
|
||||
// PrevMatcherUntilSelection is like PrevUntilSelection, with the
|
||||
// option to filter the results based on a matcher. It returns a new
|
||||
// Selection object containing the matched elements.
|
||||
func (s *Selection) PrevMatcherUntilSelection(filter Matcher, sel *Selection) *Selection {
|
||||
if sel == nil {
|
||||
return s.PrevMatcher(filter)
|
||||
}
|
||||
return s.PrevMatcherUntilNodes(filter, sel.Nodes...)
|
||||
}
|
||||
|
||||
// PrevFilteredUntilNodes is like PrevUntilNodes, with the
|
||||
// option to filter the results based on a selector string. It returns a new
|
||||
// Selection object containing the matched elements.
|
||||
func (s *Selection) PrevFilteredUntilNodes(filterSelector string, nodes ...*html.Node) *Selection {
|
||||
return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
|
||||
nil, nodes), compileMatcher(filterSelector))
|
||||
}
|
||||
|
||||
// PrevMatcherUntilNodes is like PrevUntilNodes, with the
|
||||
// option to filter the results based on a matcher. It returns a new
|
||||
// Selection object containing the matched elements.
|
||||
func (s *Selection) PrevMatcherUntilNodes(filter Matcher, nodes ...*html.Node) *Selection {
|
||||
return filterAndPush(s, getSiblingNodes(s.Nodes, siblingPrevUntil,
|
||||
nil, nodes), filter)
|
||||
}
|
||||
|
||||
// Filter and push filters the nodes based on a matcher, and pushes the results
|
||||
// on the stack, with the srcSel as previous selection.
|
||||
func filterAndPush(srcSel *Selection, nodes []*html.Node, m Matcher) *Selection {
|
||||
// Create a temporary Selection with the specified nodes to filter using winnow
|
||||
sel := &Selection{nodes, srcSel.document, nil}
|
||||
// Filter based on matcher and push on stack
|
||||
return pushStack(srcSel, winnow(sel, m, true))
|
||||
}
|
||||
|
||||
// Internal implementation of Find that return raw nodes.
|
||||
func findWithMatcher(nodes []*html.Node, m Matcher) []*html.Node {
|
||||
// Map nodes to find the matches within the children of each node
|
||||
return mapNodes(nodes, func(i int, n *html.Node) (result []*html.Node) {
|
||||
// Go down one level, becausejQuery's Find selects only within descendants
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
if c.Type == html.ElementNode {
|
||||
result = append(result, m.MatchAll(c)...)
|
||||
}
|
||||
}
|
||||
return
|
||||
})
|
||||
}
|
||||
|
||||
// Internal implementation to get all parent nodes, stopping at the specified
|
||||
// node (or nil if no stop).
|
||||
func getParentsNodes(nodes []*html.Node, stopm Matcher, stopNodes []*html.Node) []*html.Node {
|
||||
return mapNodes(nodes, func(i int, n *html.Node) (result []*html.Node) {
|
||||
for p := n.Parent; p != nil; p = p.Parent {
|
||||
sel := newSingleSelection(p, nil)
|
||||
if stopm != nil {
|
||||
if sel.IsMatcher(stopm) {
|
||||
break
|
||||
}
|
||||
} else if len(stopNodes) > 0 {
|
||||
if sel.IsNodes(stopNodes...) {
|
||||
break
|
||||
}
|
||||
}
|
||||
if p.Type == html.ElementNode {
|
||||
result = append(result, p)
|
||||
}
|
||||
}
|
||||
return
|
||||
})
|
||||
}
|
||||
|
||||
// Internal implementation of sibling nodes that return a raw slice of matches.
|
||||
func getSiblingNodes(nodes []*html.Node, st siblingType, untilm Matcher, untilNodes []*html.Node) []*html.Node {
|
||||
var f func(*html.Node) bool
|
||||
|
||||
// If the requested siblings are ...Until, create the test function to
|
||||
// determine if the until condition is reached (returns true if it is)
|
||||
if st == siblingNextUntil || st == siblingPrevUntil {
|
||||
f = func(n *html.Node) bool {
|
||||
if untilm != nil {
|
||||
// Matcher-based condition
|
||||
sel := newSingleSelection(n, nil)
|
||||
return sel.IsMatcher(untilm)
|
||||
} else if len(untilNodes) > 0 {
|
||||
// Nodes-based condition
|
||||
sel := newSingleSelection(n, nil)
|
||||
return sel.IsNodes(untilNodes...)
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return mapNodes(nodes, func(i int, n *html.Node) []*html.Node {
|
||||
return getChildrenWithSiblingType(n.Parent, st, n, f)
|
||||
})
|
||||
}
|
||||
|
||||
// Gets the children nodes of each node in the specified slice of nodes,
|
||||
// based on the sibling type request.
|
||||
func getChildrenNodes(nodes []*html.Node, st siblingType) []*html.Node {
|
||||
return mapNodes(nodes, func(i int, n *html.Node) []*html.Node {
|
||||
return getChildrenWithSiblingType(n, st, nil, nil)
|
||||
})
|
||||
}
|
||||
|
||||
// Gets the children of the specified parent, based on the requested sibling
|
||||
// type, skipping a specified node if required.
|
||||
func getChildrenWithSiblingType(parent *html.Node, st siblingType, skipNode *html.Node,
|
||||
untilFunc func(*html.Node) bool) (result []*html.Node) {
|
||||
|
||||
// Create the iterator function
|
||||
var iter = func(cur *html.Node) (ret *html.Node) {
|
||||
// Based on the sibling type requested, iterate the right way
|
||||
for {
|
||||
switch st {
|
||||
case siblingAll, siblingAllIncludingNonElements:
|
||||
if cur == nil {
|
||||
// First iteration, start with first child of parent
|
||||
// Skip node if required
|
||||
if ret = parent.FirstChild; ret == skipNode && skipNode != nil {
|
||||
ret = skipNode.NextSibling
|
||||
}
|
||||
} else {
|
||||
// Skip node if required
|
||||
if ret = cur.NextSibling; ret == skipNode && skipNode != nil {
|
||||
ret = skipNode.NextSibling
|
||||
}
|
||||
}
|
||||
case siblingPrev, siblingPrevAll, siblingPrevUntil:
|
||||
if cur == nil {
|
||||
// Start with previous sibling of the skip node
|
||||
ret = skipNode.PrevSibling
|
||||
} else {
|
||||
ret = cur.PrevSibling
|
||||
}
|
||||
case siblingNext, siblingNextAll, siblingNextUntil:
|
||||
if cur == nil {
|
||||
// Start with next sibling of the skip node
|
||||
ret = skipNode.NextSibling
|
||||
} else {
|
||||
ret = cur.NextSibling
|
||||
}
|
||||
default:
|
||||
panic("Invalid sibling type.")
|
||||
}
|
||||
if ret == nil || ret.Type == html.ElementNode || st == siblingAllIncludingNonElements {
|
||||
return
|
||||
}
|
||||
// Not a valid node, try again from this one
|
||||
cur = ret
|
||||
}
|
||||
}
|
||||
|
||||
for c := iter(nil); c != nil; c = iter(c) {
|
||||
// If this is an ...Until case, test before append (returns true
|
||||
// if the until condition is reached)
|
||||
if st == siblingNextUntil || st == siblingPrevUntil {
|
||||
if untilFunc(c) {
|
||||
return
|
||||
}
|
||||
}
|
||||
result = append(result, c)
|
||||
if st == siblingNext || st == siblingPrev {
|
||||
// Only one node was requested (immediate next or previous), so exit
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Internal implementation of parent nodes that return a raw slice of Nodes.
|
||||
func getParentNodes(nodes []*html.Node) []*html.Node {
|
||||
return mapNodes(nodes, func(i int, n *html.Node) []*html.Node {
|
||||
if n.Parent != nil && n.Parent.Type == html.ElementNode {
|
||||
return []*html.Node{n.Parent}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// Internal map function used by many traversing methods. Takes the source nodes
|
||||
// to iterate on and the mapping function that returns an array of nodes.
|
||||
// Returns an array of nodes mapped by calling the callback function once for
|
||||
// each node in the source nodes.
|
||||
func mapNodes(nodes []*html.Node, f func(int, *html.Node) []*html.Node) (result []*html.Node) {
|
||||
set := make(map[*html.Node]bool)
|
||||
for i, n := range nodes {
|
||||
if vals := f(i, n); len(vals) > 0 {
|
||||
result = appendWithoutDuplicates(result, vals, set)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
|
@ -0,0 +1,141 @@
|
|||
package goquery
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"github.com/andybalholm/cascadia"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// Document represents an HTML document to be manipulated. Unlike jQuery, which
|
||||
// is loaded as part of a DOM document, and thus acts upon its containing
|
||||
// document, GoQuery doesn't know which HTML document to act upon. So it needs
|
||||
// to be told, and that's what the Document class is for. It holds the root
|
||||
// document node to manipulate, and can make selections on this document.
|
||||
type Document struct {
|
||||
*Selection
|
||||
Url *url.URL
|
||||
rootNode *html.Node
|
||||
}
|
||||
|
||||
// NewDocumentFromNode is a Document constructor that takes a root html Node
|
||||
// as argument.
|
||||
func NewDocumentFromNode(root *html.Node) *Document {
|
||||
return newDocument(root, nil)
|
||||
}
|
||||
|
||||
// NewDocument is a Document constructor that takes a string URL as argument.
|
||||
// It loads the specified document, parses it, and stores the root Document
|
||||
// node, ready to be manipulated.
|
||||
//
|
||||
// Deprecated: Use the net/http standard library package to make the request
|
||||
// and validate the response before calling goquery.NewDocumentFromReader
|
||||
// with the response's body.
|
||||
func NewDocument(url string) (*Document, error) {
|
||||
// Load the URL
|
||||
res, e := http.Get(url)
|
||||
if e != nil {
|
||||
return nil, e
|
||||
}
|
||||
return NewDocumentFromResponse(res)
|
||||
}
|
||||
|
||||
// NewDocumentFromReader returns a Document from an io.Reader.
|
||||
// It returns an error as second value if the reader's data cannot be parsed
|
||||
// as html. It does not check if the reader is also an io.Closer, the
|
||||
// provided reader is never closed by this call. It is the responsibility
|
||||
// of the caller to close it if required.
|
||||
func NewDocumentFromReader(r io.Reader) (*Document, error) {
|
||||
root, e := html.Parse(r)
|
||||
if e != nil {
|
||||
return nil, e
|
||||
}
|
||||
return newDocument(root, nil), nil
|
||||
}
|
||||
|
||||
// NewDocumentFromResponse is another Document constructor that takes an http response as argument.
|
||||
// It loads the specified response's document, parses it, and stores the root Document
|
||||
// node, ready to be manipulated. The response's body is closed on return.
|
||||
//
|
||||
// Deprecated: Use goquery.NewDocumentFromReader with the response's body.
|
||||
func NewDocumentFromResponse(res *http.Response) (*Document, error) {
|
||||
if res == nil {
|
||||
return nil, errors.New("Response is nil")
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.Request == nil {
|
||||
return nil, errors.New("Response.Request is nil")
|
||||
}
|
||||
|
||||
// Parse the HTML into nodes
|
||||
root, e := html.Parse(res.Body)
|
||||
if e != nil {
|
||||
return nil, e
|
||||
}
|
||||
|
||||
// Create and fill the document
|
||||
return newDocument(root, res.Request.URL), nil
|
||||
}
|
||||
|
||||
// CloneDocument creates a deep-clone of a document.
|
||||
func CloneDocument(doc *Document) *Document {
|
||||
return newDocument(cloneNode(doc.rootNode), doc.Url)
|
||||
}
|
||||
|
||||
// Private constructor, make sure all fields are correctly filled.
|
||||
func newDocument(root *html.Node, url *url.URL) *Document {
|
||||
// Create and fill the document
|
||||
d := &Document{nil, url, root}
|
||||
d.Selection = newSingleSelection(root, d)
|
||||
return d
|
||||
}
|
||||
|
||||
// Selection represents a collection of nodes matching some criteria. The
|
||||
// initial Selection can be created by using Document.Find, and then
|
||||
// manipulated using the jQuery-like chainable syntax and methods.
|
||||
type Selection struct {
|
||||
Nodes []*html.Node
|
||||
document *Document
|
||||
prevSel *Selection
|
||||
}
|
||||
|
||||
// Helper constructor to create an empty selection
|
||||
func newEmptySelection(doc *Document) *Selection {
|
||||
return &Selection{nil, doc, nil}
|
||||
}
|
||||
|
||||
// Helper constructor to create a selection of only one node
|
||||
func newSingleSelection(node *html.Node, doc *Document) *Selection {
|
||||
return &Selection{[]*html.Node{node}, doc, nil}
|
||||
}
|
||||
|
||||
// Matcher is an interface that defines the methods to match
|
||||
// HTML nodes against a compiled selector string. Cascadia's
|
||||
// Selector implements this interface.
|
||||
type Matcher interface {
|
||||
Match(*html.Node) bool
|
||||
MatchAll(*html.Node) []*html.Node
|
||||
Filter([]*html.Node) []*html.Node
|
||||
}
|
||||
|
||||
// compileMatcher compiles the selector string s and returns
|
||||
// the corresponding Matcher. If s is an invalid selector string,
|
||||
// it returns a Matcher that fails all matches.
|
||||
func compileMatcher(s string) Matcher {
|
||||
cs, err := cascadia.Compile(s)
|
||||
if err != nil {
|
||||
return invalidMatcher{}
|
||||
}
|
||||
return cs
|
||||
}
|
||||
|
||||
// invalidMatcher is a Matcher that always fails to match.
|
||||
type invalidMatcher struct{}
|
||||
|
||||
func (invalidMatcher) Match(n *html.Node) bool { return false }
|
||||
func (invalidMatcher) MatchAll(n *html.Node) []*html.Node { return nil }
|
||||
func (invalidMatcher) Filter(ns []*html.Node) []*html.Node { return nil }
|
|
@ -0,0 +1,161 @@
|
|||
package goquery
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// used to determine if a set (map[*html.Node]bool) should be used
|
||||
// instead of iterating over a slice. The set uses more memory and
|
||||
// is slower than slice iteration for small N.
|
||||
const minNodesForSet = 1000
|
||||
|
||||
var nodeNames = []string{
|
||||
html.ErrorNode: "#error",
|
||||
html.TextNode: "#text",
|
||||
html.DocumentNode: "#document",
|
||||
html.CommentNode: "#comment",
|
||||
}
|
||||
|
||||
// NodeName returns the node name of the first element in the selection.
|
||||
// It tries to behave in a similar way as the DOM's nodeName property
|
||||
// (https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeName).
|
||||
//
|
||||
// Go's net/html package defines the following node types, listed with
|
||||
// the corresponding returned value from this function:
|
||||
//
|
||||
// ErrorNode : #error
|
||||
// TextNode : #text
|
||||
// DocumentNode : #document
|
||||
// ElementNode : the element's tag name
|
||||
// CommentNode : #comment
|
||||
// DoctypeNode : the name of the document type
|
||||
//
|
||||
func NodeName(s *Selection) string {
|
||||
if s.Length() == 0 {
|
||||
return ""
|
||||
}
|
||||
switch n := s.Get(0); n.Type {
|
||||
case html.ElementNode, html.DoctypeNode:
|
||||
return n.Data
|
||||
default:
|
||||
if n.Type >= 0 && int(n.Type) < len(nodeNames) {
|
||||
return nodeNames[n.Type]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// OuterHtml returns the outer HTML rendering of the first item in
|
||||
// the selection - that is, the HTML including the first element's
|
||||
// tag and attributes.
|
||||
//
|
||||
// Unlike InnerHtml, this is a function and not a method on the Selection,
|
||||
// because this is not a jQuery method (in javascript-land, this is
|
||||
// a property provided by the DOM).
|
||||
func OuterHtml(s *Selection) (string, error) {
|
||||
var buf bytes.Buffer
|
||||
|
||||
if s.Length() == 0 {
|
||||
return "", nil
|
||||
}
|
||||
n := s.Get(0)
|
||||
if err := html.Render(&buf, n); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return buf.String(), nil
|
||||
}
|
||||
|
||||
// Loop through all container nodes to search for the target node.
|
||||
func sliceContains(container []*html.Node, contained *html.Node) bool {
|
||||
for _, n := range container {
|
||||
if nodeContains(n, contained) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Checks if the contained node is within the container node.
|
||||
func nodeContains(container *html.Node, contained *html.Node) bool {
|
||||
// Check if the parent of the contained node is the container node, traversing
|
||||
// upward until the top is reached, or the container is found.
|
||||
for contained = contained.Parent; contained != nil; contained = contained.Parent {
|
||||
if container == contained {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Checks if the target node is in the slice of nodes.
|
||||
func isInSlice(slice []*html.Node, node *html.Node) bool {
|
||||
return indexInSlice(slice, node) > -1
|
||||
}
|
||||
|
||||
// Returns the index of the target node in the slice, or -1.
|
||||
func indexInSlice(slice []*html.Node, node *html.Node) int {
|
||||
if node != nil {
|
||||
for i, n := range slice {
|
||||
if n == node {
|
||||
return i
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// Appends the new nodes to the target slice, making sure no duplicate is added.
|
||||
// There is no check to the original state of the target slice, so it may still
|
||||
// contain duplicates. The target slice is returned because append() may create
|
||||
// a new underlying array. If targetSet is nil, a local set is created with the
|
||||
// target if len(target) + len(nodes) is greater than minNodesForSet.
|
||||
func appendWithoutDuplicates(target []*html.Node, nodes []*html.Node, targetSet map[*html.Node]bool) []*html.Node {
|
||||
// if there are not that many nodes, don't use the map, faster to just use nested loops
|
||||
// (unless a non-nil targetSet is passed, in which case the caller knows better).
|
||||
if targetSet == nil && len(target)+len(nodes) < minNodesForSet {
|
||||
for _, n := range nodes {
|
||||
if !isInSlice(target, n) {
|
||||
target = append(target, n)
|
||||
}
|
||||
}
|
||||
return target
|
||||
}
|
||||
|
||||
// if a targetSet is passed, then assume it is reliable, otherwise create one
|
||||
// and initialize it with the current target contents.
|
||||
if targetSet == nil {
|
||||
targetSet = make(map[*html.Node]bool, len(target))
|
||||
for _, n := range target {
|
||||
targetSet[n] = true
|
||||
}
|
||||
}
|
||||
for _, n := range nodes {
|
||||
if !targetSet[n] {
|
||||
target = append(target, n)
|
||||
targetSet[n] = true
|
||||
}
|
||||
}
|
||||
|
||||
return target
|
||||
}
|
||||
|
||||
// Loop through a selection, returning only those nodes that pass the predicate
|
||||
// function.
|
||||
func grep(sel *Selection, predicate func(i int, s *Selection) bool) (result []*html.Node) {
|
||||
for i, n := range sel.Nodes {
|
||||
if predicate(i, newSingleSelection(n, sel.document)) {
|
||||
result = append(result, n)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Creates a new Selection object based on the specified nodes, and keeps the
|
||||
// source Selection object on the stack (linked list).
|
||||
func pushStack(fromSel *Selection, nodes []*html.Node) *Selection {
|
||||
result := &Selection{nodes, fromSel.document, fromSel}
|
||||
return result
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
language: go
|
||||
|
||||
go:
|
||||
- 1.3
|
||||
- 1.4
|
||||
|
||||
install:
|
||||
- go get github.com/andybalholm/cascadia
|
||||
|
||||
script:
|
||||
- go test -v
|
||||
|
||||
notifications:
|
||||
email: false
|
|
@ -0,0 +1,24 @@
|
|||
Copyright (c) 2011 Andy Balholm. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,7 @@
|
|||
# cascadia
|
||||
|
||||
[![](https://travis-ci.org/andybalholm/cascadia.svg)](https://travis-ci.org/andybalholm/cascadia)
|
||||
|
||||
The Cascadia package implements CSS selectors for use with the parse trees produced by the html package.
|
||||
|
||||
To test CSS selectors without writing Go code, check out [cascadia](https://github.com/suntong/cascadia) the command line tool, a thin wrapper around this package.
|
|
@ -0,0 +1,3 @@
|
|||
module "github.com/andybalholm/cascadia"
|
||||
|
||||
require "golang.org/x/net" v0.0.0-20180218175443-cbe0f9307d01
|
|
@ -0,0 +1,835 @@
|
|||
// Package cascadia is an implementation of CSS selectors.
|
||||
package cascadia
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// a parser for CSS selectors
|
||||
type parser struct {
|
||||
s string // the source text
|
||||
i int // the current position
|
||||
}
|
||||
|
||||
// parseEscape parses a backslash escape.
|
||||
func (p *parser) parseEscape() (result string, err error) {
|
||||
if len(p.s) < p.i+2 || p.s[p.i] != '\\' {
|
||||
return "", errors.New("invalid escape sequence")
|
||||
}
|
||||
|
||||
start := p.i + 1
|
||||
c := p.s[start]
|
||||
switch {
|
||||
case c == '\r' || c == '\n' || c == '\f':
|
||||
return "", errors.New("escaped line ending outside string")
|
||||
case hexDigit(c):
|
||||
// unicode escape (hex)
|
||||
var i int
|
||||
for i = start; i < p.i+6 && i < len(p.s) && hexDigit(p.s[i]); i++ {
|
||||
// empty
|
||||
}
|
||||
v, _ := strconv.ParseUint(p.s[start:i], 16, 21)
|
||||
if len(p.s) > i {
|
||||
switch p.s[i] {
|
||||
case '\r':
|
||||
i++
|
||||
if len(p.s) > i && p.s[i] == '\n' {
|
||||
i++
|
||||
}
|
||||
case ' ', '\t', '\n', '\f':
|
||||
i++
|
||||
}
|
||||
}
|
||||
p.i = i
|
||||
return string(rune(v)), nil
|
||||
}
|
||||
|
||||
// Return the literal character after the backslash.
|
||||
result = p.s[start : start+1]
|
||||
p.i += 2
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func hexDigit(c byte) bool {
|
||||
return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
|
||||
}
|
||||
|
||||
// nameStart returns whether c can be the first character of an identifier
|
||||
// (not counting an initial hyphen, or an escape sequence).
|
||||
func nameStart(c byte) bool {
|
||||
return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_' || c > 127
|
||||
}
|
||||
|
||||
// nameChar returns whether c can be a character within an identifier
|
||||
// (not counting an escape sequence).
|
||||
func nameChar(c byte) bool {
|
||||
return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_' || c > 127 ||
|
||||
c == '-' || '0' <= c && c <= '9'
|
||||
}
|
||||
|
||||
// parseIdentifier parses an identifier.
|
||||
func (p *parser) parseIdentifier() (result string, err error) {
|
||||
startingDash := false
|
||||
if len(p.s) > p.i && p.s[p.i] == '-' {
|
||||
startingDash = true
|
||||
p.i++
|
||||
}
|
||||
|
||||
if len(p.s) <= p.i {
|
||||
return "", errors.New("expected identifier, found EOF instead")
|
||||
}
|
||||
|
||||
if c := p.s[p.i]; !(nameStart(c) || c == '\\') {
|
||||
return "", fmt.Errorf("expected identifier, found %c instead", c)
|
||||
}
|
||||
|
||||
result, err = p.parseName()
|
||||
if startingDash && err == nil {
|
||||
result = "-" + result
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// parseName parses a name (which is like an identifier, but doesn't have
|
||||
// extra restrictions on the first character).
|
||||
func (p *parser) parseName() (result string, err error) {
|
||||
i := p.i
|
||||
loop:
|
||||
for i < len(p.s) {
|
||||
c := p.s[i]
|
||||
switch {
|
||||
case nameChar(c):
|
||||
start := i
|
||||
for i < len(p.s) && nameChar(p.s[i]) {
|
||||
i++
|
||||
}
|
||||
result += p.s[start:i]
|
||||
case c == '\\':
|
||||
p.i = i
|
||||
val, err := p.parseEscape()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
i = p.i
|
||||
result += val
|
||||
default:
|
||||
break loop
|
||||
}
|
||||
}
|
||||
|
||||
if result == "" {
|
||||
return "", errors.New("expected name, found EOF instead")
|
||||
}
|
||||
|
||||
p.i = i
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// parseString parses a single- or double-quoted string.
|
||||
func (p *parser) parseString() (result string, err error) {
|
||||
i := p.i
|
||||
if len(p.s) < i+2 {
|
||||
return "", errors.New("expected string, found EOF instead")
|
||||
}
|
||||
|
||||
quote := p.s[i]
|
||||
i++
|
||||
|
||||
loop:
|
||||
for i < len(p.s) {
|
||||
switch p.s[i] {
|
||||
case '\\':
|
||||
if len(p.s) > i+1 {
|
||||
switch c := p.s[i+1]; c {
|
||||
case '\r':
|
||||
if len(p.s) > i+2 && p.s[i+2] == '\n' {
|
||||
i += 3
|
||||
continue loop
|
||||
}
|
||||
fallthrough
|
||||
case '\n', '\f':
|
||||
i += 2
|
||||
continue loop
|
||||
}
|
||||
}
|
||||
p.i = i
|
||||
val, err := p.parseEscape()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
i = p.i
|
||||
result += val
|
||||
case quote:
|
||||
break loop
|
||||
case '\r', '\n', '\f':
|
||||
return "", errors.New("unexpected end of line in string")
|
||||
default:
|
||||
start := i
|
||||
for i < len(p.s) {
|
||||
if c := p.s[i]; c == quote || c == '\\' || c == '\r' || c == '\n' || c == '\f' {
|
||||
break
|
||||
}
|
||||
i++
|
||||
}
|
||||
result += p.s[start:i]
|
||||
}
|
||||
}
|
||||
|
||||
if i >= len(p.s) {
|
||||
return "", errors.New("EOF in string")
|
||||
}
|
||||
|
||||
// Consume the final quote.
|
||||
i++
|
||||
|
||||
p.i = i
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// parseRegex parses a regular expression; the end is defined by encountering an
|
||||
// unmatched closing ')' or ']' which is not consumed
|
||||
func (p *parser) parseRegex() (rx *regexp.Regexp, err error) {
|
||||
i := p.i
|
||||
if len(p.s) < i+2 {
|
||||
return nil, errors.New("expected regular expression, found EOF instead")
|
||||
}
|
||||
|
||||
// number of open parens or brackets;
|
||||
// when it becomes negative, finished parsing regex
|
||||
open := 0
|
||||
|
||||
loop:
|
||||
for i < len(p.s) {
|
||||
switch p.s[i] {
|
||||
case '(', '[':
|
||||
open++
|
||||
case ')', ']':
|
||||
open--
|
||||
if open < 0 {
|
||||
break loop
|
||||
}
|
||||
}
|
||||
i++
|
||||
}
|
||||
|
||||
if i >= len(p.s) {
|
||||
return nil, errors.New("EOF in regular expression")
|
||||
}
|
||||
rx, err = regexp.Compile(p.s[p.i:i])
|
||||
p.i = i
|
||||
return rx, err
|
||||
}
|
||||
|
||||
// skipWhitespace consumes whitespace characters and comments.
|
||||
// It returns true if there was actually anything to skip.
|
||||
func (p *parser) skipWhitespace() bool {
|
||||
i := p.i
|
||||
for i < len(p.s) {
|
||||
switch p.s[i] {
|
||||
case ' ', '\t', '\r', '\n', '\f':
|
||||
i++
|
||||
continue
|
||||
case '/':
|
||||
if strings.HasPrefix(p.s[i:], "/*") {
|
||||
end := strings.Index(p.s[i+len("/*"):], "*/")
|
||||
if end != -1 {
|
||||
i += end + len("/**/")
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
if i > p.i {
|
||||
p.i = i
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// consumeParenthesis consumes an opening parenthesis and any following
|
||||
// whitespace. It returns true if there was actually a parenthesis to skip.
|
||||
func (p *parser) consumeParenthesis() bool {
|
||||
if p.i < len(p.s) && p.s[p.i] == '(' {
|
||||
p.i++
|
||||
p.skipWhitespace()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// consumeClosingParenthesis consumes a closing parenthesis and any preceding
|
||||
// whitespace. It returns true if there was actually a parenthesis to skip.
|
||||
func (p *parser) consumeClosingParenthesis() bool {
|
||||
i := p.i
|
||||
p.skipWhitespace()
|
||||
if p.i < len(p.s) && p.s[p.i] == ')' {
|
||||
p.i++
|
||||
return true
|
||||
}
|
||||
p.i = i
|
||||
return false
|
||||
}
|
||||
|
||||
// parseTypeSelector parses a type selector (one that matches by tag name).
|
||||
func (p *parser) parseTypeSelector() (result Selector, err error) {
|
||||
tag, err := p.parseIdentifier()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return typeSelector(tag), nil
|
||||
}
|
||||
|
||||
// parseIDSelector parses a selector that matches by id attribute.
|
||||
func (p *parser) parseIDSelector() (Selector, error) {
|
||||
if p.i >= len(p.s) {
|
||||
return nil, fmt.Errorf("expected id selector (#id), found EOF instead")
|
||||
}
|
||||
if p.s[p.i] != '#' {
|
||||
return nil, fmt.Errorf("expected id selector (#id), found '%c' instead", p.s[p.i])
|
||||
}
|
||||
|
||||
p.i++
|
||||
id, err := p.parseName()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return attributeEqualsSelector("id", id), nil
|
||||
}
|
||||
|
||||
// parseClassSelector parses a selector that matches by class attribute.
|
||||
func (p *parser) parseClassSelector() (Selector, error) {
|
||||
if p.i >= len(p.s) {
|
||||
return nil, fmt.Errorf("expected class selector (.class), found EOF instead")
|
||||
}
|
||||
if p.s[p.i] != '.' {
|
||||
return nil, fmt.Errorf("expected class selector (.class), found '%c' instead", p.s[p.i])
|
||||
}
|
||||
|
||||
p.i++
|
||||
class, err := p.parseIdentifier()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return attributeIncludesSelector("class", class), nil
|
||||
}
|
||||
|
||||
// parseAttributeSelector parses a selector that matches by attribute value.
|
||||
func (p *parser) parseAttributeSelector() (Selector, error) {
|
||||
if p.i >= len(p.s) {
|
||||
return nil, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead")
|
||||
}
|
||||
if p.s[p.i] != '[' {
|
||||
return nil, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", p.s[p.i])
|
||||
}
|
||||
|
||||
p.i++
|
||||
p.skipWhitespace()
|
||||
key, err := p.parseIdentifier()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
p.skipWhitespace()
|
||||
if p.i >= len(p.s) {
|
||||
return nil, errors.New("unexpected EOF in attribute selector")
|
||||
}
|
||||
|
||||
if p.s[p.i] == ']' {
|
||||
p.i++
|
||||
return attributeExistsSelector(key), nil
|
||||
}
|
||||
|
||||
if p.i+2 >= len(p.s) {
|
||||
return nil, errors.New("unexpected EOF in attribute selector")
|
||||
}
|
||||
|
||||
op := p.s[p.i : p.i+2]
|
||||
if op[0] == '=' {
|
||||
op = "="
|
||||
} else if op[1] != '=' {
|
||||
return nil, fmt.Errorf(`expected equality operator, found "%s" instead`, op)
|
||||
}
|
||||
p.i += len(op)
|
||||
|
||||
p.skipWhitespace()
|
||||
if p.i >= len(p.s) {
|
||||
return nil, errors.New("unexpected EOF in attribute selector")
|
||||
}
|
||||
var val string
|
||||
var rx *regexp.Regexp
|
||||
if op == "#=" {
|
||||
rx, err = p.parseRegex()
|
||||
} else {
|
||||
switch p.s[p.i] {
|
||||
case '\'', '"':
|
||||
val, err = p.parseString()
|
||||
default:
|
||||
val, err = p.parseIdentifier()
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
p.skipWhitespace()
|
||||
if p.i >= len(p.s) {
|
||||
return nil, errors.New("unexpected EOF in attribute selector")
|
||||
}
|
||||
if p.s[p.i] != ']' {
|
||||
return nil, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i])
|
||||
}
|
||||
p.i++
|
||||
|
||||
switch op {
|
||||
case "=":
|
||||
return attributeEqualsSelector(key, val), nil
|
||||
case "!=":
|
||||
return attributeNotEqualSelector(key, val), nil
|
||||
case "~=":
|
||||
return attributeIncludesSelector(key, val), nil
|
||||
case "|=":
|
||||
return attributeDashmatchSelector(key, val), nil
|
||||
case "^=":
|
||||
return attributePrefixSelector(key, val), nil
|
||||
case "$=":
|
||||
return attributeSuffixSelector(key, val), nil
|
||||
case "*=":
|
||||
return attributeSubstringSelector(key, val), nil
|
||||
case "#=":
|
||||
return attributeRegexSelector(key, rx), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("attribute operator %q is not supported", op)
|
||||
}
|
||||
|
||||
var errExpectedParenthesis = errors.New("expected '(' but didn't find it")
|
||||
var errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it")
|
||||
var errUnmatchedParenthesis = errors.New("unmatched '('")
|
||||
|
||||
// parsePseudoclassSelector parses a pseudoclass selector like :not(p).
|
||||
func (p *parser) parsePseudoclassSelector() (Selector, error) {
|
||||
if p.i >= len(p.s) {
|
||||
return nil, fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead")
|
||||
}
|
||||
if p.s[p.i] != ':' {
|
||||
return nil, fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i])
|
||||
}
|
||||
|
||||
p.i++
|
||||
name, err := p.parseIdentifier()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
name = toLowerASCII(name)
|
||||
|
||||
switch name {
|
||||
case "not", "has", "haschild":
|
||||
if !p.consumeParenthesis() {
|
||||
return nil, errExpectedParenthesis
|
||||
}
|
||||
sel, parseErr := p.parseSelectorGroup()
|
||||
if parseErr != nil {
|
||||
return nil, parseErr
|
||||
}
|
||||
if !p.consumeClosingParenthesis() {
|
||||
return nil, errExpectedClosingParenthesis
|
||||
}
|
||||
|
||||
switch name {
|
||||
case "not":
|
||||
return negatedSelector(sel), nil
|
||||
case "has":
|
||||
return hasDescendantSelector(sel), nil
|
||||
case "haschild":
|
||||
return hasChildSelector(sel), nil
|
||||
}
|
||||
|
||||
case "contains", "containsown":
|
||||
if !p.consumeParenthesis() {
|
||||
return nil, errExpectedParenthesis
|
||||
}
|
||||
if p.i == len(p.s) {
|
||||
return nil, errUnmatchedParenthesis
|
||||
}
|
||||
var val string
|
||||
switch p.s[p.i] {
|
||||
case '\'', '"':
|
||||
val, err = p.parseString()
|
||||
default:
|
||||
val, err = p.parseIdentifier()
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
val = strings.ToLower(val)
|
||||
p.skipWhitespace()
|
||||
if p.i >= len(p.s) {
|
||||
return nil, errors.New("unexpected EOF in pseudo selector")
|
||||
}
|
||||
if !p.consumeClosingParenthesis() {
|
||||
return nil, errExpectedClosingParenthesis
|
||||
}
|
||||
|
||||
switch name {
|
||||
case "contains":
|
||||
return textSubstrSelector(val), nil
|
||||
case "containsown":
|
||||
return ownTextSubstrSelector(val), nil
|
||||
}
|
||||
|
||||
case "matches", "matchesown":
|
||||
if !p.consumeParenthesis() {
|
||||
return nil, errExpectedParenthesis
|
||||
}
|
||||
rx, err := p.parseRegex()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if p.i >= len(p.s) {
|
||||
return nil, errors.New("unexpected EOF in pseudo selector")
|
||||
}
|
||||
if !p.consumeClosingParenthesis() {
|
||||
return nil, errExpectedClosingParenthesis
|
||||
}
|
||||
|
||||
switch name {
|
||||
case "matches":
|
||||
return textRegexSelector(rx), nil
|
||||
case "matchesown":
|
||||
return ownTextRegexSelector(rx), nil
|
||||
}
|
||||
|
||||
case "nth-child", "nth-last-child", "nth-of-type", "nth-last-of-type":
|
||||
if !p.consumeParenthesis() {
|
||||
return nil, errExpectedParenthesis
|
||||
}
|
||||
a, b, err := p.parseNth()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !p.consumeClosingParenthesis() {
|
||||
return nil, errExpectedClosingParenthesis
|
||||
}
|
||||
if a == 0 {
|
||||
switch name {
|
||||
case "nth-child":
|
||||
return simpleNthChildSelector(b, false), nil
|
||||
case "nth-of-type":
|
||||
return simpleNthChildSelector(b, true), nil
|
||||
case "nth-last-child":
|
||||
return simpleNthLastChildSelector(b, false), nil
|
||||
case "nth-last-of-type":
|
||||
return simpleNthLastChildSelector(b, true), nil
|
||||
}
|
||||
}
|
||||
return nthChildSelector(a, b,
|
||||
name == "nth-last-child" || name == "nth-last-of-type",
|
||||
name == "nth-of-type" || name == "nth-last-of-type"),
|
||||
nil
|
||||
|
||||
case "first-child":
|
||||
return simpleNthChildSelector(1, false), nil
|
||||
case "last-child":
|
||||
return simpleNthLastChildSelector(1, false), nil
|
||||
case "first-of-type":
|
||||
return simpleNthChildSelector(1, true), nil
|
||||
case "last-of-type":
|
||||
return simpleNthLastChildSelector(1, true), nil
|
||||
case "only-child":
|
||||
return onlyChildSelector(false), nil
|
||||
case "only-of-type":
|
||||
return onlyChildSelector(true), nil
|
||||
case "input":
|
||||
return inputSelector, nil
|
||||
case "empty":
|
||||
return emptyElementSelector, nil
|
||||
case "root":
|
||||
return rootSelector, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unknown pseudoclass :%s", name)
|
||||
}
|
||||
|
||||
// parseInteger parses a decimal integer.
|
||||
func (p *parser) parseInteger() (int, error) {
|
||||
i := p.i
|
||||
start := i
|
||||
for i < len(p.s) && '0' <= p.s[i] && p.s[i] <= '9' {
|
||||
i++
|
||||
}
|
||||
if i == start {
|
||||
return 0, errors.New("expected integer, but didn't find it")
|
||||
}
|
||||
p.i = i
|
||||
|
||||
val, err := strconv.Atoi(p.s[start:i])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return val, nil
|
||||
}
|
||||
|
||||
// parseNth parses the argument for :nth-child (normally of the form an+b).
|
||||
func (p *parser) parseNth() (a, b int, err error) {
|
||||
// initial state
|
||||
if p.i >= len(p.s) {
|
||||
goto eof
|
||||
}
|
||||
switch p.s[p.i] {
|
||||
case '-':
|
||||
p.i++
|
||||
goto negativeA
|
||||
case '+':
|
||||
p.i++
|
||||
goto positiveA
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
||||
goto positiveA
|
||||
case 'n', 'N':
|
||||
a = 1
|
||||
p.i++
|
||||
goto readN
|
||||
case 'o', 'O', 'e', 'E':
|
||||
id, nameErr := p.parseName()
|
||||
if nameErr != nil {
|
||||
return 0, 0, nameErr
|
||||
}
|
||||
id = toLowerASCII(id)
|
||||
if id == "odd" {
|
||||
return 2, 1, nil
|
||||
}
|
||||
if id == "even" {
|
||||
return 2, 0, nil
|
||||
}
|
||||
return 0, 0, fmt.Errorf("expected 'odd' or 'even', but found '%s' instead", id)
|
||||
default:
|
||||
goto invalid
|
||||
}
|
||||
|
||||
positiveA:
|
||||
if p.i >= len(p.s) {
|
||||
goto eof
|
||||
}
|
||||
switch p.s[p.i] {
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
||||
a, err = p.parseInteger()
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
goto readA
|
||||
case 'n', 'N':
|
||||
a = 1
|
||||
p.i++
|
||||
goto readN
|
||||
default:
|
||||
goto invalid
|
||||
}
|
||||
|
||||
negativeA:
|
||||
if p.i >= len(p.s) {
|
||||
goto eof
|
||||
}
|
||||
switch p.s[p.i] {
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
||||
a, err = p.parseInteger()
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
a = -a
|
||||
goto readA
|
||||
case 'n', 'N':
|
||||
a = -1
|
||||
p.i++
|
||||
goto readN
|
||||
default:
|
||||
goto invalid
|
||||
}
|
||||
|
||||
readA:
|
||||
if p.i >= len(p.s) {
|
||||
goto eof
|
||||
}
|
||||
switch p.s[p.i] {
|
||||
case 'n', 'N':
|
||||
p.i++
|
||||
goto readN
|
||||
default:
|
||||
// The number we read as a is actually b.
|
||||
return 0, a, nil
|
||||
}
|
||||
|
||||
readN:
|
||||
p.skipWhitespace()
|
||||
if p.i >= len(p.s) {
|
||||
goto eof
|
||||
}
|
||||
switch p.s[p.i] {
|
||||
case '+':
|
||||
p.i++
|
||||
p.skipWhitespace()
|
||||
b, err = p.parseInteger()
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
return a, b, nil
|
||||
case '-':
|
||||
p.i++
|
||||
p.skipWhitespace()
|
||||
b, err = p.parseInteger()
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
return a, -b, nil
|
||||
default:
|
||||
return a, 0, nil
|
||||
}
|
||||
|
||||
eof:
|
||||
return 0, 0, errors.New("unexpected EOF while attempting to parse expression of form an+b")
|
||||
|
||||
invalid:
|
||||
return 0, 0, errors.New("unexpected character while attempting to parse expression of form an+b")
|
||||
}
|
||||
|
||||
// parseSimpleSelectorSequence parses a selector sequence that applies to
|
||||
// a single element.
|
||||
func (p *parser) parseSimpleSelectorSequence() (Selector, error) {
|
||||
var result Selector
|
||||
|
||||
if p.i >= len(p.s) {
|
||||
return nil, errors.New("expected selector, found EOF instead")
|
||||
}
|
||||
|
||||
switch p.s[p.i] {
|
||||
case '*':
|
||||
// It's the universal selector. Just skip over it, since it doesn't affect the meaning.
|
||||
p.i++
|
||||
case '#', '.', '[', ':':
|
||||
// There's no type selector. Wait to process the other till the main loop.
|
||||
default:
|
||||
r, err := p.parseTypeSelector()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result = r
|
||||
}
|
||||
|
||||
loop:
|
||||
for p.i < len(p.s) {
|
||||
var ns Selector
|
||||
var err error
|
||||
switch p.s[p.i] {
|
||||
case '#':
|
||||
ns, err = p.parseIDSelector()
|
||||
case '.':
|
||||
ns, err = p.parseClassSelector()
|
||||
case '[':
|
||||
ns, err = p.parseAttributeSelector()
|
||||
case ':':
|
||||
ns, err = p.parsePseudoclassSelector()
|
||||
default:
|
||||
break loop
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if result == nil {
|
||||
result = ns
|
||||
} else {
|
||||
result = intersectionSelector(result, ns)
|
||||
}
|
||||
}
|
||||
|
||||
if result == nil {
|
||||
result = func(n *html.Node) bool {
|
||||
return n.Type == html.ElementNode
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// parseSelector parses a selector that may include combinators.
|
||||
func (p *parser) parseSelector() (result Selector, err error) {
|
||||
p.skipWhitespace()
|
||||
result, err = p.parseSimpleSelectorSequence()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
for {
|
||||
var combinator byte
|
||||
if p.skipWhitespace() {
|
||||
combinator = ' '
|
||||
}
|
||||
if p.i >= len(p.s) {
|
||||
return
|
||||
}
|
||||
|
||||
switch p.s[p.i] {
|
||||
case '+', '>', '~':
|
||||
combinator = p.s[p.i]
|
||||
p.i++
|
||||
p.skipWhitespace()
|
||||
case ',', ')':
|
||||
// These characters can't begin a selector, but they can legally occur after one.
|
||||
return
|
||||
}
|
||||
|
||||
if combinator == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
c, err := p.parseSimpleSelectorSequence()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch combinator {
|
||||
case ' ':
|
||||
result = descendantSelector(result, c)
|
||||
case '>':
|
||||
result = childSelector(result, c)
|
||||
case '+':
|
||||
result = siblingSelector(result, c, true)
|
||||
case '~':
|
||||
result = siblingSelector(result, c, false)
|
||||
}
|
||||
}
|
||||
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
// parseSelectorGroup parses a group of selectors, separated by commas.
|
||||
func (p *parser) parseSelectorGroup() (result Selector, err error) {
|
||||
result, err = p.parseSelector()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
for p.i < len(p.s) {
|
||||
if p.s[p.i] != ',' {
|
||||
return result, nil
|
||||
}
|
||||
p.i++
|
||||
c, err := p.parseSelector()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result = unionSelector(result, c)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
|
@ -0,0 +1,622 @@
|
|||
package cascadia
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// the Selector type, and functions for creating them
|
||||
|
||||
// A Selector is a function which tells whether a node matches or not.
|
||||
type Selector func(*html.Node) bool
|
||||
|
||||
// hasChildMatch returns whether n has any child that matches a.
|
||||
func hasChildMatch(n *html.Node, a Selector) bool {
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
if a(c) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// hasDescendantMatch performs a depth-first search of n's descendants,
|
||||
// testing whether any of them match a. It returns true as soon as a match is
|
||||
// found, or false if no match is found.
|
||||
func hasDescendantMatch(n *html.Node, a Selector) bool {
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
if a(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Compile parses a selector and returns, if successful, a Selector object
|
||||
// that can be used to match against html.Node objects.
|
||||
func Compile(sel string) (Selector, error) {
|
||||
p := &parser{s: sel}
|
||||
compiled, err := p.parseSelectorGroup()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if p.i < len(sel) {
|
||||
return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
|
||||
}
|
||||
|
||||
return compiled, nil
|
||||
}
|
||||
|
||||
// MustCompile is like Compile, but panics instead of returning an error.
|
||||
func MustCompile(sel string) Selector {
|
||||
compiled, err := Compile(sel)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return compiled
|
||||
}
|
||||
|
||||
// MatchAll returns a slice of the nodes that match the selector,
|
||||
// from n and its children.
|
||||
func (s Selector) MatchAll(n *html.Node) []*html.Node {
|
||||
return s.matchAllInto(n, nil)
|
||||
}
|
||||
|
||||
func (s Selector) matchAllInto(n *html.Node, storage []*html.Node) []*html.Node {
|
||||
if s(n) {
|
||||
storage = append(storage, n)
|
||||
}
|
||||
|
||||
for child := n.FirstChild; child != nil; child = child.NextSibling {
|
||||
storage = s.matchAllInto(child, storage)
|
||||
}
|
||||
|
||||
return storage
|
||||
}
|
||||
|
||||
// Match returns true if the node matches the selector.
|
||||
func (s Selector) Match(n *html.Node) bool {
|
||||
return s(n)
|
||||
}
|
||||
|
||||
// MatchFirst returns the first node that matches s, from n and its children.
|
||||
func (s Selector) MatchFirst(n *html.Node) *html.Node {
|
||||
if s.Match(n) {
|
||||
return n
|
||||
}
|
||||
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
m := s.MatchFirst(c)
|
||||
if m != nil {
|
||||
return m
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Filter returns the nodes in nodes that match the selector.
|
||||
func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) {
|
||||
for _, n := range nodes {
|
||||
if s(n) {
|
||||
result = append(result, n)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// typeSelector returns a Selector that matches elements with a given tag name.
|
||||
func typeSelector(tag string) Selector {
|
||||
tag = toLowerASCII(tag)
|
||||
return func(n *html.Node) bool {
|
||||
return n.Type == html.ElementNode && n.Data == tag
|
||||
}
|
||||
}
|
||||
|
||||
// toLowerASCII returns s with all ASCII capital letters lowercased.
|
||||
func toLowerASCII(s string) string {
|
||||
var b []byte
|
||||
for i := 0; i < len(s); i++ {
|
||||
if c := s[i]; 'A' <= c && c <= 'Z' {
|
||||
if b == nil {
|
||||
b = make([]byte, len(s))
|
||||
copy(b, s)
|
||||
}
|
||||
b[i] = s[i] + ('a' - 'A')
|
||||
}
|
||||
}
|
||||
|
||||
if b == nil {
|
||||
return s
|
||||
}
|
||||
|
||||
return string(b)
|
||||
}
|
||||
|
||||
// attributeSelector returns a Selector that matches elements
|
||||
// where the attribute named key satisifes the function f.
|
||||
func attributeSelector(key string, f func(string) bool) Selector {
|
||||
key = toLowerASCII(key)
|
||||
return func(n *html.Node) bool {
|
||||
if n.Type != html.ElementNode {
|
||||
return false
|
||||
}
|
||||
for _, a := range n.Attr {
|
||||
if a.Key == key && f(a.Val) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// attributeExistsSelector returns a Selector that matches elements that have
|
||||
// an attribute named key.
|
||||
func attributeExistsSelector(key string) Selector {
|
||||
return attributeSelector(key, func(string) bool { return true })
|
||||
}
|
||||
|
||||
// attributeEqualsSelector returns a Selector that matches elements where
|
||||
// the attribute named key has the value val.
|
||||
func attributeEqualsSelector(key, val string) Selector {
|
||||
return attributeSelector(key,
|
||||
func(s string) bool {
|
||||
return s == val
|
||||
})
|
||||
}
|
||||
|
||||
// attributeNotEqualSelector returns a Selector that matches elements where
|
||||
// the attribute named key does not have the value val.
|
||||
func attributeNotEqualSelector(key, val string) Selector {
|
||||
key = toLowerASCII(key)
|
||||
return func(n *html.Node) bool {
|
||||
if n.Type != html.ElementNode {
|
||||
return false
|
||||
}
|
||||
for _, a := range n.Attr {
|
||||
if a.Key == key && a.Val == val {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// attributeIncludesSelector returns a Selector that matches elements where
|
||||
// the attribute named key is a whitespace-separated list that includes val.
|
||||
func attributeIncludesSelector(key, val string) Selector {
|
||||
return attributeSelector(key,
|
||||
func(s string) bool {
|
||||
for s != "" {
|
||||
i := strings.IndexAny(s, " \t\r\n\f")
|
||||
if i == -1 {
|
||||
return s == val
|
||||
}
|
||||
if s[:i] == val {
|
||||
return true
|
||||
}
|
||||
s = s[i+1:]
|
||||
}
|
||||
return false
|
||||
})
|
||||
}
|
||||
|
||||
// attributeDashmatchSelector returns a Selector that matches elements where
|
||||
// the attribute named key equals val or starts with val plus a hyphen.
|
||||
func attributeDashmatchSelector(key, val string) Selector {
|
||||
return attributeSelector(key,
|
||||
func(s string) bool {
|
||||
if s == val {
|
||||
return true
|
||||
}
|
||||
if len(s) <= len(val) {
|
||||
return false
|
||||
}
|
||||
if s[:len(val)] == val && s[len(val)] == '-' {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
})
|
||||
}
|
||||
|
||||
// attributePrefixSelector returns a Selector that matches elements where
|
||||
// the attribute named key starts with val.
|
||||
func attributePrefixSelector(key, val string) Selector {
|
||||
return attributeSelector(key,
|
||||
func(s string) bool {
|
||||
if strings.TrimSpace(s) == "" {
|
||||
return false
|
||||
}
|
||||
return strings.HasPrefix(s, val)
|
||||
})
|
||||
}
|
||||
|
||||
// attributeSuffixSelector returns a Selector that matches elements where
|
||||
// the attribute named key ends with val.
|
||||
func attributeSuffixSelector(key, val string) Selector {
|
||||
return attributeSelector(key,
|
||||
func(s string) bool {
|
||||
if strings.TrimSpace(s) == "" {
|
||||
return false
|
||||
}
|
||||
return strings.HasSuffix(s, val)
|
||||
})
|
||||
}
|
||||
|
||||
// attributeSubstringSelector returns a Selector that matches nodes where
|
||||
// the attribute named key contains val.
|
||||
func attributeSubstringSelector(key, val string) Selector {
|
||||
return attributeSelector(key,
|
||||
func(s string) bool {
|
||||
if strings.TrimSpace(s) == "" {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(s, val)
|
||||
})
|
||||
}
|
||||
|
||||
// attributeRegexSelector returns a Selector that matches nodes where
|
||||
// the attribute named key matches the regular expression rx
|
||||
func attributeRegexSelector(key string, rx *regexp.Regexp) Selector {
|
||||
return attributeSelector(key,
|
||||
func(s string) bool {
|
||||
return rx.MatchString(s)
|
||||
})
|
||||
}
|
||||
|
||||
// intersectionSelector returns a selector that matches nodes that match
|
||||
// both a and b.
|
||||
func intersectionSelector(a, b Selector) Selector {
|
||||
return func(n *html.Node) bool {
|
||||
return a(n) && b(n)
|
||||
}
|
||||
}
|
||||
|
||||
// unionSelector returns a selector that matches elements that match
|
||||
// either a or b.
|
||||
func unionSelector(a, b Selector) Selector {
|
||||
return func(n *html.Node) bool {
|
||||
return a(n) || b(n)
|
||||
}
|
||||
}
|
||||
|
||||
// negatedSelector returns a selector that matches elements that do not match a.
|
||||
func negatedSelector(a Selector) Selector {
|
||||
return func(n *html.Node) bool {
|
||||
if n.Type != html.ElementNode {
|
||||
return false
|
||||
}
|
||||
return !a(n)
|
||||
}
|
||||
}
|
||||
|
||||
// writeNodeText writes the text contained in n and its descendants to b.
|
||||
func writeNodeText(n *html.Node, b *bytes.Buffer) {
|
||||
switch n.Type {
|
||||
case html.TextNode:
|
||||
b.WriteString(n.Data)
|
||||
case html.ElementNode:
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
writeNodeText(c, b)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// nodeText returns the text contained in n and its descendants.
|
||||
func nodeText(n *html.Node) string {
|
||||
var b bytes.Buffer
|
||||
writeNodeText(n, &b)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// nodeOwnText returns the contents of the text nodes that are direct
|
||||
// children of n.
|
||||
func nodeOwnText(n *html.Node) string {
|
||||
var b bytes.Buffer
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
if c.Type == html.TextNode {
|
||||
b.WriteString(c.Data)
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// textSubstrSelector returns a selector that matches nodes that
|
||||
// contain the given text.
|
||||
func textSubstrSelector(val string) Selector {
|
||||
return func(n *html.Node) bool {
|
||||
text := strings.ToLower(nodeText(n))
|
||||
return strings.Contains(text, val)
|
||||
}
|
||||
}
|
||||
|
||||
// ownTextSubstrSelector returns a selector that matches nodes that
|
||||
// directly contain the given text
|
||||
func ownTextSubstrSelector(val string) Selector {
|
||||
return func(n *html.Node) bool {
|
||||
text := strings.ToLower(nodeOwnText(n))
|
||||
return strings.Contains(text, val)
|
||||
}
|
||||
}
|
||||
|
||||
// textRegexSelector returns a selector that matches nodes whose text matches
|
||||
// the specified regular expression
|
||||
func textRegexSelector(rx *regexp.Regexp) Selector {
|
||||
return func(n *html.Node) bool {
|
||||
return rx.MatchString(nodeText(n))
|
||||
}
|
||||
}
|
||||
|
||||
// ownTextRegexSelector returns a selector that matches nodes whose text
|
||||
// directly matches the specified regular expression
|
||||
func ownTextRegexSelector(rx *regexp.Regexp) Selector {
|
||||
return func(n *html.Node) bool {
|
||||
return rx.MatchString(nodeOwnText(n))
|
||||
}
|
||||
}
|
||||
|
||||
// hasChildSelector returns a selector that matches elements
|
||||
// with a child that matches a.
|
||||
func hasChildSelector(a Selector) Selector {
|
||||
return func(n *html.Node) bool {
|
||||
if n.Type != html.ElementNode {
|
||||
return false
|
||||
}
|
||||
return hasChildMatch(n, a)
|
||||
}
|
||||
}
|
||||
|
||||
// hasDescendantSelector returns a selector that matches elements
|
||||
// with any descendant that matches a.
|
||||
func hasDescendantSelector(a Selector) Selector {
|
||||
return func(n *html.Node) bool {
|
||||
if n.Type != html.ElementNode {
|
||||
return false
|
||||
}
|
||||
return hasDescendantMatch(n, a)
|
||||
}
|
||||
}
|
||||
|
||||
// nthChildSelector returns a selector that implements :nth-child(an+b).
|
||||
// If last is true, implements :nth-last-child instead.
|
||||
// If ofType is true, implements :nth-of-type instead.
|
||||
func nthChildSelector(a, b int, last, ofType bool) Selector {
|
||||
return func(n *html.Node) bool {
|
||||
if n.Type != html.ElementNode {
|
||||
return false
|
||||
}
|
||||
|
||||
parent := n.Parent
|
||||
if parent == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if parent.Type == html.DocumentNode {
|
||||
return false
|
||||
}
|
||||
|
||||
i := -1
|
||||
count := 0
|
||||
for c := parent.FirstChild; c != nil; c = c.NextSibling {
|
||||
if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) {
|
||||
continue
|
||||
}
|
||||
count++
|
||||
if c == n {
|
||||
i = count
|
||||
if !last {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if i == -1 {
|
||||
// This shouldn't happen, since n should always be one of its parent's children.
|
||||
return false
|
||||
}
|
||||
|
||||
if last {
|
||||
i = count - i + 1
|
||||
}
|
||||
|
||||
i -= b
|
||||
if a == 0 {
|
||||
return i == 0
|
||||
}
|
||||
|
||||
return i%a == 0 && i/a >= 0
|
||||
}
|
||||
}
|
||||
|
||||
// simpleNthChildSelector returns a selector that implements :nth-child(b).
|
||||
// If ofType is true, implements :nth-of-type instead.
|
||||
func simpleNthChildSelector(b int, ofType bool) Selector {
|
||||
return func(n *html.Node) bool {
|
||||
if n.Type != html.ElementNode {
|
||||
return false
|
||||
}
|
||||
|
||||
parent := n.Parent
|
||||
if parent == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if parent.Type == html.DocumentNode {
|
||||
return false
|
||||
}
|
||||
|
||||
count := 0
|
||||
for c := parent.FirstChild; c != nil; c = c.NextSibling {
|
||||
if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
|
||||
continue
|
||||
}
|
||||
count++
|
||||
if c == n {
|
||||
return count == b
|
||||
}
|
||||
if count >= b {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// simpleNthLastChildSelector returns a selector that implements
|
||||
// :nth-last-child(b). If ofType is true, implements :nth-last-of-type
|
||||
// instead.
|
||||
func simpleNthLastChildSelector(b int, ofType bool) Selector {
|
||||
return func(n *html.Node) bool {
|
||||
if n.Type != html.ElementNode {
|
||||
return false
|
||||
}
|
||||
|
||||
parent := n.Parent
|
||||
if parent == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if parent.Type == html.DocumentNode {
|
||||
return false
|
||||
}
|
||||
|
||||
count := 0
|
||||
for c := parent.LastChild; c != nil; c = c.PrevSibling {
|
||||
if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
|
||||
continue
|
||||
}
|
||||
count++
|
||||
if c == n {
|
||||
return count == b
|
||||
}
|
||||
if count >= b {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// onlyChildSelector returns a selector that implements :only-child.
|
||||
// If ofType is true, it implements :only-of-type instead.
|
||||
func onlyChildSelector(ofType bool) Selector {
|
||||
return func(n *html.Node) bool {
|
||||
if n.Type != html.ElementNode {
|
||||
return false
|
||||
}
|
||||
|
||||
parent := n.Parent
|
||||
if parent == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if parent.Type == html.DocumentNode {
|
||||
return false
|
||||
}
|
||||
|
||||
count := 0
|
||||
for c := parent.FirstChild; c != nil; c = c.NextSibling {
|
||||
if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) {
|
||||
continue
|
||||
}
|
||||
count++
|
||||
if count > 1 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return count == 1
|
||||
}
|
||||
}
|
||||
|
||||
// inputSelector is a Selector that matches input, select, textarea and button elements.
|
||||
func inputSelector(n *html.Node) bool {
|
||||
return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button")
|
||||
}
|
||||
|
||||
// emptyElementSelector is a Selector that matches empty elements.
|
||||
func emptyElementSelector(n *html.Node) bool {
|
||||
if n.Type != html.ElementNode {
|
||||
return false
|
||||
}
|
||||
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
switch c.Type {
|
||||
case html.ElementNode, html.TextNode:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// descendantSelector returns a Selector that matches an element if
|
||||
// it matches d and has an ancestor that matches a.
|
||||
func descendantSelector(a, d Selector) Selector {
|
||||
return func(n *html.Node) bool {
|
||||
if !d(n) {
|
||||
return false
|
||||
}
|
||||
|
||||
for p := n.Parent; p != nil; p = p.Parent {
|
||||
if a(p) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// childSelector returns a Selector that matches an element if
|
||||
// it matches d and its parent matches a.
|
||||
func childSelector(a, d Selector) Selector {
|
||||
return func(n *html.Node) bool {
|
||||
return d(n) && n.Parent != nil && a(n.Parent)
|
||||
}
|
||||
}
|
||||
|
||||
// siblingSelector returns a Selector that matches an element
|
||||
// if it matches s2 and in is preceded by an element that matches s1.
|
||||
// If adjacent is true, the sibling must be immediately before the element.
|
||||
func siblingSelector(s1, s2 Selector, adjacent bool) Selector {
|
||||
return func(n *html.Node) bool {
|
||||
if !s2(n) {
|
||||
return false
|
||||
}
|
||||
|
||||
if adjacent {
|
||||
for n = n.PrevSibling; n != nil; n = n.PrevSibling {
|
||||
if n.Type == html.TextNode || n.Type == html.CommentNode {
|
||||
continue
|
||||
}
|
||||
return s1(n)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Walk backwards looking for element that matches s1
|
||||
for c := n.PrevSibling; c != nil; c = c.PrevSibling {
|
||||
if s1(c) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// rootSelector implements :root
|
||||
func rootSelector(n *html.Node) bool {
|
||||
if n.Type != html.ElementNode {
|
||||
return false
|
||||
}
|
||||
if n.Parent == nil {
|
||||
return false
|
||||
}
|
||||
return n.Parent.Type == html.DocumentNode
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
# vscode
|
||||
.vscode
|
||||
debug
|
||||
*.test
|
||||
|
||||
./build
|
||||
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
|
@ -0,0 +1,15 @@
|
|||
language: go
|
||||
|
||||
go:
|
||||
- 1.6
|
||||
- 1.7
|
||||
- 1.8
|
||||
|
||||
install:
|
||||
- go get golang.org/x/net/html/charset
|
||||
- go get golang.org/x/net/html
|
||||
- go get github.com/antchfx/xpath
|
||||
- go get github.com/mattn/goveralls
|
||||
|
||||
script:
|
||||
- $HOME/gopath/bin/goveralls -service=travis-ci
|
|
@ -0,0 +1,17 @@
|
|||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
|
@ -0,0 +1,102 @@
|
|||
htmlquery
|
||||
====
|
||||
[![Build Status](https://travis-ci.org/antchfx/htmlquery.svg?branch=master)](https://travis-ci.org/antchfx/htmlquery)
|
||||
[![Coverage Status](https://coveralls.io/repos/github/antchfx/htmlquery/badge.svg?branch=master)](https://coveralls.io/github/antchfx/htmlquery?branch=master)
|
||||
[![GoDoc](https://godoc.org/github.com/antchfx/htmlquery?status.svg)](https://godoc.org/github.com/antchfx/htmlquery)
|
||||
[![Go Report Card](https://goreportcard.com/badge/github.com/antchfx/htmlquery)](https://goreportcard.com/report/github.com/antchfx/htmlquery)
|
||||
|
||||
Overview
|
||||
====
|
||||
|
||||
htmlquery is an XPath query package for HTML, lets you extract data or evaluate from HTML documents by an XPath expression.
|
||||
|
||||
Changelogs
|
||||
===
|
||||
|
||||
2019-02-04
|
||||
- [#7](https://github.com/antchfx/htmlquery/issues/7) Removed deprecated `FindEach()` and `FindEachWithBreak()` methods.
|
||||
|
||||
2018-12-28
|
||||
- Avoid adding duplicate elements to list for `Find()` method. [#6](https://github.com/antchfx/htmlquery/issues/6)
|
||||
|
||||
Installation
|
||||
====
|
||||
|
||||
> $ go get github.com/antchfx/htmlquery
|
||||
|
||||
Getting Started
|
||||
====
|
||||
|
||||
#### Load HTML document from URL.
|
||||
|
||||
```go
|
||||
doc, err := htmlquery.LoadURL("http://example.com/")
|
||||
```
|
||||
|
||||
#### Load HTML document from string.
|
||||
|
||||
```go
|
||||
s := `<html>....</html>`
|
||||
doc, err := htmlquery.Parse(strings.NewReader(s))
|
||||
```
|
||||
|
||||
#### Find all A elements.
|
||||
|
||||
```go
|
||||
list := htmlquery.Find(doc, "//a")
|
||||
```
|
||||
|
||||
#### Find all A elements that have `href` attribute.
|
||||
|
||||
```go
|
||||
list := range htmlquery.Find(doc, "//a[@href]")
|
||||
```
|
||||
|
||||
#### Find all A elements and only get `href` attribute self.
|
||||
|
||||
```go
|
||||
list := range htmlquery.Find(doc, "//a/@href")
|
||||
```
|
||||
|
||||
### Find the third A element.
|
||||
|
||||
```go
|
||||
a := htmlquery.FindOne(doc, "//a[3]")
|
||||
```
|
||||
|
||||
#### Evaluate the number of all IMG element.
|
||||
|
||||
```go
|
||||
expr, _ := xpath.Compile("count(//img)")
|
||||
v := expr.Evaluate(htmlquery.CreateXPathNavigator(doc)).(float64)
|
||||
fmt.Printf("total count is %f", v)
|
||||
```
|
||||
|
||||
Quick Tutorial
|
||||
===
|
||||
|
||||
```go
|
||||
func main() {
|
||||
doc, err := htmlquery.LoadURL("https://www.bing.com/search?q=golang")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
// Find all news item.
|
||||
for i, n := range htmlquery.Find(doc, "//ol/li") {
|
||||
a := htmlquery.FindOne(n, "//a")
|
||||
fmt.Printf("%d %s(%s)\n", i, htmlquery.InnerText(a), htmlquery.SelectAttr(a, "href"))
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
List of supported XPath query packages
|
||||
===
|
||||
|Name |Description |
|
||||
|--------------------------|----------------|
|
||||
|[htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for the HTML document|
|
||||
|[xmlquery](https://github.com/antchfx/xmlquery) | XPath query package for the XML document|
|
||||
|[jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for the JSON document|
|
||||
|
||||
Questions
|
||||
===
|
||||
Please let me know if you have any questions.
|
|
@ -0,0 +1,291 @@
|
|||
/*
|
||||
Package htmlquery provides extract data from HTML documents using XPath expression.
|
||||
*/
|
||||
package htmlquery
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
|
||||
"github.com/antchfx/xpath"
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
||||
var _ xpath.NodeNavigator = &NodeNavigator{}
|
||||
|
||||
// CreateXPathNavigator creates a new xpath.NodeNavigator for the specified html.Node.
|
||||
func CreateXPathNavigator(top *html.Node) *NodeNavigator {
|
||||
return &NodeNavigator{curr: top, root: top, attr: -1}
|
||||
}
|
||||
|
||||
// Find searches the html.Node that matches by the specified XPath expr.
|
||||
func Find(top *html.Node, expr string) []*html.Node {
|
||||
exp, err := xpath.Compile(expr)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var elems []*html.Node
|
||||
t := exp.Select(CreateXPathNavigator(top))
|
||||
for t.MoveNext() {
|
||||
nav := t.Current().(*NodeNavigator)
|
||||
n := getCurrentNode(nav)
|
||||
// avoid adding duplicate nodes.
|
||||
if len(elems) > 0 && (elems[0] == n || (nav.NodeType() == xpath.AttributeNode &&
|
||||
nav.LocalName() == elems[0].Data && nav.Value() == InnerText(elems[0]))) {
|
||||
continue
|
||||
}
|
||||
elems = append(elems, n)
|
||||
}
|
||||
return elems
|
||||
}
|
||||
|
||||
// FindOne searches the html.Node that matches by the specified XPath expr,
|
||||
// and returns first element of matched html.Node.
|
||||
func FindOne(top *html.Node, expr string) *html.Node {
|
||||
var elem *html.Node
|
||||
exp, err := xpath.Compile(expr)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
t := exp.Select(CreateXPathNavigator(top))
|
||||
if t.MoveNext() {
|
||||
elem = getCurrentNode(t.Current().(*NodeNavigator))
|
||||
}
|
||||
return elem
|
||||
}
|
||||
|
||||
// LoadURL loads the HTML document from the specified URL.
|
||||
func LoadURL(url string) (*html.Node, error) {
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
r, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return html.Parse(r)
|
||||
}
|
||||
|
||||
func getCurrentNode(n *NodeNavigator) *html.Node {
|
||||
if n.NodeType() == xpath.AttributeNode {
|
||||
childNode := &html.Node{
|
||||
Type: html.TextNode,
|
||||
Data: n.Value(),
|
||||
}
|
||||
return &html.Node{
|
||||
Type: html.ElementNode,
|
||||
Data: n.LocalName(),
|
||||
FirstChild: childNode,
|
||||
LastChild: childNode,
|
||||
}
|
||||
|
||||
}
|
||||
return n.curr
|
||||
}
|
||||
|
||||
// Parse returns the parse tree for the HTML from the given Reader.
|
||||
func Parse(r io.Reader) (*html.Node, error) {
|
||||
return html.Parse(r)
|
||||
}
|
||||
|
||||
// InnerText returns the text between the start and end tags of the object.
|
||||
func InnerText(n *html.Node) string {
|
||||
var output func(*bytes.Buffer, *html.Node)
|
||||
output = func(buf *bytes.Buffer, n *html.Node) {
|
||||
switch n.Type {
|
||||
case html.TextNode:
|
||||
buf.WriteString(n.Data)
|
||||
return
|
||||
case html.CommentNode:
|
||||
return
|
||||
}
|
||||
for child := n.FirstChild; child != nil; child = child.NextSibling {
|
||||
output(buf, child)
|
||||
}
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
output(&buf, n)
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// SelectAttr returns the attribute value with the specified name.
|
||||
func SelectAttr(n *html.Node, name string) (val string) {
|
||||
if n == nil {
|
||||
return
|
||||
}
|
||||
if n.Type == html.ElementNode && n.Parent == nil && name == n.Data {
|
||||
return InnerText(n)
|
||||
}
|
||||
for _, attr := range n.Attr {
|
||||
if attr.Key == name {
|
||||
val = attr.Val
|
||||
break
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// OutputHTML returns the text including tags name.
|
||||
func OutputHTML(n *html.Node, self bool) string {
|
||||
var buf bytes.Buffer
|
||||
if self {
|
||||
html.Render(&buf, n)
|
||||
} else {
|
||||
for n := n.FirstChild; n != nil; n = n.NextSibling {
|
||||
html.Render(&buf, n)
|
||||
}
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
type NodeNavigator struct {
|
||||
root, curr *html.Node
|
||||
attr int
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) Current() *html.Node {
|
||||
return h.curr
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) NodeType() xpath.NodeType {
|
||||
switch h.curr.Type {
|
||||
case html.CommentNode:
|
||||
return xpath.CommentNode
|
||||
case html.TextNode:
|
||||
return xpath.TextNode
|
||||
case html.DocumentNode:
|
||||
return xpath.RootNode
|
||||
case html.ElementNode:
|
||||
if h.attr != -1 {
|
||||
return xpath.AttributeNode
|
||||
}
|
||||
return xpath.ElementNode
|
||||
case html.DoctypeNode:
|
||||
// ignored <!DOCTYPE HTML> declare and as Root-Node type.
|
||||
return xpath.RootNode
|
||||
}
|
||||
panic(fmt.Sprintf("unknown HTML node type: %v", h.curr.Type))
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) LocalName() string {
|
||||
if h.attr != -1 {
|
||||
return h.curr.Attr[h.attr].Key
|
||||
}
|
||||
return h.curr.Data
|
||||
}
|
||||
|
||||
func (*NodeNavigator) Prefix() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) Value() string {
|
||||
switch h.curr.Type {
|
||||
case html.CommentNode:
|
||||
return h.curr.Data
|
||||
case html.ElementNode:
|
||||
if h.attr != -1 {
|
||||
return h.curr.Attr[h.attr].Val
|
||||
}
|
||||
return InnerText(h.curr)
|
||||
case html.TextNode:
|
||||
return h.curr.Data
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) Copy() xpath.NodeNavigator {
|
||||
n := *h
|
||||
return &n
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToRoot() {
|
||||
h.curr = h.root
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToParent() bool {
|
||||
if h.attr != -1 {
|
||||
h.attr = -1
|
||||
return true
|
||||
} else if node := h.curr.Parent; node != nil {
|
||||
h.curr = node
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToNextAttribute() bool {
|
||||
if h.attr >= len(h.curr.Attr)-1 {
|
||||
return false
|
||||
}
|
||||
h.attr++
|
||||
return true
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToChild() bool {
|
||||
if h.attr != -1 {
|
||||
return false
|
||||
}
|
||||
if node := h.curr.FirstChild; node != nil {
|
||||
h.curr = node
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToFirst() bool {
|
||||
if h.attr != -1 || h.curr.PrevSibling == nil {
|
||||
return false
|
||||
}
|
||||
for {
|
||||
node := h.curr.PrevSibling
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
h.curr = node
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) String() string {
|
||||
return h.Value()
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToNext() bool {
|
||||
if h.attr != -1 {
|
||||
return false
|
||||
}
|
||||
if node := h.curr.NextSibling; node != nil {
|
||||
h.curr = node
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToPrevious() bool {
|
||||
if h.attr != -1 {
|
||||
return false
|
||||
}
|
||||
if node := h.curr.PrevSibling; node != nil {
|
||||
h.curr = node
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveTo(other xpath.NodeNavigator) bool {
|
||||
node, ok := other.(*NodeNavigator)
|
||||
if !ok || node.root != h.root {
|
||||
return false
|
||||
}
|
||||
|
||||
h.curr = node.curr
|
||||
h.attr = node.attr
|
||||
return true
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
# vscode
|
||||
.vscode
|
||||
debug
|
||||
*.test
|
||||
|
||||
./build
|
||||
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
|
@ -0,0 +1,14 @@
|
|||
language: go
|
||||
|
||||
go:
|
||||
- 1.6
|
||||
- 1.7
|
||||
- 1.8
|
||||
|
||||
install:
|
||||
- go get golang.org/x/net/html/charset
|
||||
- go get github.com/antchfx/xpath
|
||||
- go get github.com/mattn/goveralls
|
||||
|
||||
script:
|
||||
- $HOME/gopath/bin/goveralls -service=travis-ci
|
|
@ -0,0 +1,17 @@
|
|||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
|
@ -0,0 +1,186 @@
|
|||
xmlquery
|
||||
====
|
||||
[![Build Status](https://travis-ci.org/antchfx/xmlquery.svg?branch=master)](https://travis-ci.org/antchfx/xmlquery)
|
||||
[![Coverage Status](https://coveralls.io/repos/github/antchfx/xmlquery/badge.svg?branch=master)](https://coveralls.io/github/antchfx/xmlquery?branch=master)
|
||||
[![GoDoc](https://godoc.org/github.com/antchfx/xmlquery?status.svg)](https://godoc.org/github.com/antchfx/xmlquery)
|
||||
[![Go Report Card](https://goreportcard.com/badge/github.com/antchfx/xmlquery)](https://goreportcard.com/report/github.com/antchfx/xmlquery)
|
||||
|
||||
Overview
|
||||
===
|
||||
|
||||
xmlquery is an XPath query package for XML document, lets you extract data or evaluate from XML documents by an XPath expression.
|
||||
|
||||
Change Logs
|
||||
===
|
||||
|
||||
**2018-12-23**
|
||||
* added XML output will including comment node. [#9](https://github.com/antchfx/xmlquery/issues/9)
|
||||
|
||||
**2018-12-03**
|
||||
* added support attribute name with namespace prefix and XML output. [#6](https://github.com/antchfx/xmlquery/issues/6)
|
||||
|
||||
Installation
|
||||
====
|
||||
|
||||
> $ go get github.com/antchfx/xmlquery
|
||||
|
||||
Getting Started
|
||||
===
|
||||
|
||||
#### Parse a XML from URL.
|
||||
|
||||
```go
|
||||
doc, err := xmlquery.LoadURL("http://www.example.com/sitemap.xml")
|
||||
```
|
||||
|
||||
#### Parse a XML from string.
|
||||
|
||||
```go
|
||||
s := `<?xml version="1.0" encoding="utf-8"?><rss version="2.0"></rss>`
|
||||
doc, err := xmlquery.Parse(strings.NewReader(s))
|
||||
```
|
||||
|
||||
#### Parse a XML from io.Reader.
|
||||
|
||||
```go
|
||||
f, err := os.Open("../books.xml")
|
||||
doc, err := xmlquery.Parse(f)
|
||||
```
|
||||
|
||||
#### Find authors of all books in the bookstore.
|
||||
|
||||
```go
|
||||
list := xmlquery.Find(doc, "//book//author")
|
||||
// or
|
||||
list := xmlquery.Find(doc, "//author")
|
||||
```
|
||||
|
||||
#### Find the second book.
|
||||
|
||||
```go
|
||||
book := xmlquery.FindOne(doc, "//book[2]")
|
||||
```
|
||||
|
||||
#### Find all book elements and only get `id` attribute self. (New Feature)
|
||||
|
||||
```go
|
||||
list := xmlquery.Find(doc,"//book/@id")
|
||||
```
|
||||
|
||||
#### Find all books with id is bk104.
|
||||
|
||||
```go
|
||||
list := xmlquery.Find(doc, "//book[@id='bk104']")
|
||||
```
|
||||
|
||||
#### Find all books that price less than 5.
|
||||
|
||||
```go
|
||||
list := xmlquery.Find(doc, "//book[price<5]")
|
||||
```
|
||||
|
||||
#### Evaluate the total price of all books.
|
||||
|
||||
```go
|
||||
expr, err := xpath.Compile("sum(//book/price)")
|
||||
price := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64)
|
||||
fmt.Printf("total price: %f\n", price)
|
||||
```
|
||||
|
||||
#### Evaluate the number of all books element.
|
||||
|
||||
```go
|
||||
expr, err := xpath.Compile("count(//book)")
|
||||
price := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64)
|
||||
```
|
||||
|
||||
#### Create XML document.
|
||||
|
||||
```go
|
||||
doc := &xmlquery.Node{
|
||||
Type: xmlquery.DeclarationNode,
|
||||
Data: "xml",
|
||||
Attr: []xml.Attr{
|
||||
xml.Attr{Name: xml.Name{Local: "version"}, Value: "1.0"},
|
||||
},
|
||||
}
|
||||
root := &xmlquery.Node{
|
||||
Data: "rss",
|
||||
Type: xmlquery.ElementNode,
|
||||
}
|
||||
doc.FirstChild = root
|
||||
channel := &xmlquery.Node{
|
||||
Data: "channel",
|
||||
Type: xmlquery.ElementNode,
|
||||
}
|
||||
root.FirstChild = channel
|
||||
title := &xmlquery.Node{
|
||||
Data: "title",
|
||||
Type: xmlquery.ElementNode,
|
||||
}
|
||||
title_text := &xmlquery.Node{
|
||||
Data: "W3Schools Home Page",
|
||||
Type: xmlquery.TextNode,
|
||||
}
|
||||
title.FirstChild = title_text
|
||||
channel.FirstChild = title
|
||||
fmt.Println(doc.OutputXML(true))
|
||||
// <?xml version="1.0"?><rss><channel><title>W3Schools Home Page</title></channel></rss>
|
||||
```
|
||||
|
||||
Quick Tutorial
|
||||
===
|
||||
|
||||
```go
|
||||
import (
|
||||
"github.com/antchfx/xmlquery"
|
||||
)
|
||||
|
||||
func main(){
|
||||
s := `<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>W3Schools Home Page</title>
|
||||
<link>https://www.w3schools.com</link>
|
||||
<description>Free web building tutorials</description>
|
||||
<item>
|
||||
<title>RSS Tutorial</title>
|
||||
<link>https://www.w3schools.com/xml/xml_rss.asp</link>
|
||||
<description>New RSS tutorial on W3Schools</description>
|
||||
</item>
|
||||
<item>
|
||||
<title>XML Tutorial</title>
|
||||
<link>https://www.w3schools.com/xml</link>
|
||||
<description>New XML tutorial on W3Schools</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
doc, err := xmlquery.Parse(strings.NewReader(s))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
channel := xmlquery.FindOne(doc, "//channel")
|
||||
if n := channel.SelectElement("title"); n != nil {
|
||||
fmt.Printf("title: %s\n", n.InnerText())
|
||||
}
|
||||
if n := channel.SelectElement("link"); n != nil {
|
||||
fmt.Printf("link: %s\n", n.InnerText())
|
||||
}
|
||||
for i, n := range xmlquery.Find(doc, "//item/title") {
|
||||
fmt.Printf("#%d %s\n", i, n.InnerText())
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
List of supported XPath query packages
|
||||
===
|
||||
|Name |Description |
|
||||
|--------------------------|----------------|
|
||||
|[htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for the HTML document|
|
||||
|[xmlquery](https://github.com/antchfx/xmlquery) | XPath query package for the XML document|
|
||||
|[jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for the JSON document|
|
||||
|
||||
Questions
|
||||
===
|
||||
Please let me know if you have any questions
|
|
@ -0,0 +1,121 @@
|
|||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" ?>
|
||||
<bookstore specialty="novel">
|
||||
<book id="bk101">
|
||||
<author>Gambardella, Matthew</author>
|
||||
<title>XML Developer's Guide</title>
|
||||
<genre>Computer</genre>
|
||||
<price>44.95</price>
|
||||
<publish_date>2000-10-01</publish_date>
|
||||
<description>An in-depth look at creating applications
|
||||
with XML.</description>
|
||||
</book>
|
||||
<book id="bk102">
|
||||
<author>Ralls, Kim</author>
|
||||
<title>Midnight Rain</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2000-12-16</publish_date>
|
||||
<description>A former architect battles corporate zombies,
|
||||
an evil sorceress, and her own childhood to become queen
|
||||
of the world.</description>
|
||||
</book>
|
||||
<book id="bk103">
|
||||
<author>Corets, Eva</author>
|
||||
<title>Maeve Ascendant</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2000-11-17</publish_date>
|
||||
<description>After the collapse of a nanotechnology
|
||||
society in England, the young survivors lay the
|
||||
foundation for a new society.</description>
|
||||
</book>
|
||||
<book id="bk104">
|
||||
<author>Corets, Eva</author>
|
||||
<title>Oberon's Legacy</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2001-03-10</publish_date>
|
||||
<description>In post-apocalypse England, the mysterious
|
||||
agent known only as Oberon helps to create a new life
|
||||
for the inhabitants of London. Sequel to Maeve
|
||||
Ascendant.</description>
|
||||
</book>
|
||||
<book id="bk105">
|
||||
<author>Corets, Eva</author>
|
||||
<title>The Sundered Grail</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2001-09-10</publish_date>
|
||||
<description>The two daughters of Maeve, half-sisters,
|
||||
battle one another for control of England. Sequel to
|
||||
Oberon's Legacy.</description>
|
||||
</book>
|
||||
<book id="bk106">
|
||||
<author>Randall, Cynthia</author>
|
||||
<title>Lover Birds</title>
|
||||
<genre>Romance</genre>
|
||||
<price>4.95</price>
|
||||
<publish_date>2000-09-02</publish_date>
|
||||
<description>When Carla meets Paul at an ornithology
|
||||
conference, tempers fly as feathers get ruffled.</description>
|
||||
</book>
|
||||
<book id="bk107">
|
||||
<author>Thurman, Paula</author>
|
||||
<title>Splish Splash</title>
|
||||
<genre>Romance</genre>
|
||||
<price>4.95</price>
|
||||
<publish_date>2000-11-02</publish_date>
|
||||
<description>A deep sea diver finds true love twenty
|
||||
thousand leagues beneath the sea.</description>
|
||||
</book>
|
||||
<book id="bk108">
|
||||
<author>Knorr, Stefan</author>
|
||||
<title>Creepy Crawlies</title>
|
||||
<genre>Horror</genre>
|
||||
<price>4.95</price>
|
||||
<publish_date>2000-12-06</publish_date>
|
||||
<description>An anthology of horror stories about roaches,
|
||||
centipedes, scorpions and other insects.</description>
|
||||
</book>
|
||||
<book id="bk109">
|
||||
<author>Kress, Peter</author>
|
||||
<title>Paradox Lost</title>
|
||||
<genre>Science Fiction</genre>
|
||||
<price>6.95</price>
|
||||
<publish_date>2000-11-02</publish_date>
|
||||
<description>After an inadvertant trip through a Heisenberg
|
||||
Uncertainty Device, James Salway discovers the problems
|
||||
of being quantum.</description>
|
||||
</book>
|
||||
<book id="bk110">
|
||||
<author>O'Brien, Tim</author>
|
||||
<title>Microsoft .NET: The Programming Bible</title>
|
||||
<genre>Computer</genre>
|
||||
<price>36.95</price>
|
||||
<publish_date>2000-12-09</publish_date>
|
||||
<description>Microsoft's .NET initiative is explored in
|
||||
detail in this deep programmer's reference.</description>
|
||||
</book>
|
||||
<book id="bk111">
|
||||
<author>O'Brien, Tim</author>
|
||||
<title>MSXML3: A Comprehensive Guide</title>
|
||||
<genre>Computer</genre>
|
||||
<price>36.95</price>
|
||||
<publish_date>2000-12-01</publish_date>
|
||||
<description>The Microsoft MSXML3 parser is covered in
|
||||
detail, with attention to XML DOM interfaces, XSLT processing,
|
||||
SAX and more.</description>
|
||||
</book>
|
||||
<book id="bk112">
|
||||
<author>Galos, Mike</author>
|
||||
<title>Visual Studio 7: A Comprehensive Guide</title>
|
||||
<genre>Computer</genre>
|
||||
<price>49.95</price>
|
||||
<publish_date>2001-04-16</publish_date>
|
||||
<description>Microsoft Visual Studio 7 is explored in depth,
|
||||
looking at how Visual Basic, Visual C++, C#, and ASP+ are
|
||||
integrated into a comprehensive development
|
||||
environment.</description>
|
||||
</book>
|
||||
</bookstore>
|
|
@ -0,0 +1,302 @@
|
|||
package xmlquery
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
||||
// A NodeType is the type of a Node.
|
||||
type NodeType uint
|
||||
|
||||
const (
|
||||
// DocumentNode is a document object that, as the root of the document tree,
|
||||
// provides access to the entire XML document.
|
||||
DocumentNode NodeType = iota
|
||||
// DeclarationNode is the document type declaration, indicated by the following
|
||||
// tag (for example, <!DOCTYPE...> ).
|
||||
DeclarationNode
|
||||
// ElementNode is an element (for example, <item> ).
|
||||
ElementNode
|
||||
// TextNode is the text content of a node.
|
||||
TextNode
|
||||
// CommentNode a comment (for example, <!-- my comment --> ).
|
||||
CommentNode
|
||||
// AttributeNode is an attribute of element.
|
||||
AttributeNode
|
||||
)
|
||||
|
||||
// A Node consists of a NodeType and some Data (tag name for
|
||||
// element nodes, content for text) and are part of a tree of Nodes.
|
||||
type Node struct {
|
||||
Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node
|
||||
|
||||
Type NodeType
|
||||
Data string
|
||||
Prefix string
|
||||
NamespaceURI string
|
||||
Attr []xml.Attr
|
||||
|
||||
level int // node level in the tree
|
||||
}
|
||||
|
||||
// InnerText returns the text between the start and end tags of the object.
|
||||
func (n *Node) InnerText() string {
|
||||
var output func(*bytes.Buffer, *Node)
|
||||
output = func(buf *bytes.Buffer, n *Node) {
|
||||
switch n.Type {
|
||||
case TextNode:
|
||||
buf.WriteString(n.Data)
|
||||
return
|
||||
case CommentNode:
|
||||
return
|
||||
}
|
||||
for child := n.FirstChild; child != nil; child = child.NextSibling {
|
||||
output(buf, child)
|
||||
}
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
output(&buf, n)
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
func outputXML(buf *bytes.Buffer, n *Node) {
|
||||
if n.Type == TextNode {
|
||||
xml.EscapeText(buf, []byte(strings.TrimSpace(n.Data)))
|
||||
return
|
||||
}
|
||||
if n.Type == CommentNode {
|
||||
buf.WriteString("<!--")
|
||||
buf.WriteString(n.Data)
|
||||
buf.WriteString("-->")
|
||||
return
|
||||
}
|
||||
if n.Type == DeclarationNode {
|
||||
buf.WriteString("<?" + n.Data)
|
||||
} else {
|
||||
if n.Prefix == "" {
|
||||
buf.WriteString("<" + n.Data)
|
||||
} else {
|
||||
buf.WriteString("<" + n.Prefix + ":" + n.Data)
|
||||
}
|
||||
}
|
||||
|
||||
for _, attr := range n.Attr {
|
||||
if attr.Name.Space != "" {
|
||||
buf.WriteString(fmt.Sprintf(` %s:%s="%s"`, attr.Name.Space, attr.Name.Local, attr.Value))
|
||||
} else {
|
||||
buf.WriteString(fmt.Sprintf(` %s="%s"`, attr.Name.Local, attr.Value))
|
||||
}
|
||||
}
|
||||
if n.Type == DeclarationNode {
|
||||
buf.WriteString("?>")
|
||||
} else {
|
||||
buf.WriteString(">")
|
||||
}
|
||||
for child := n.FirstChild; child != nil; child = child.NextSibling {
|
||||
outputXML(buf, child)
|
||||
}
|
||||
if n.Type != DeclarationNode {
|
||||
if n.Prefix == "" {
|
||||
buf.WriteString(fmt.Sprintf("</%s>", n.Data))
|
||||
} else {
|
||||
buf.WriteString(fmt.Sprintf("</%s:%s>", n.Prefix, n.Data))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// OutputXML returns the text that including tags name.
|
||||
func (n *Node) OutputXML(self bool) string {
|
||||
var buf bytes.Buffer
|
||||
if self {
|
||||
outputXML(&buf, n)
|
||||
} else {
|
||||
for n := n.FirstChild; n != nil; n = n.NextSibling {
|
||||
outputXML(&buf, n)
|
||||
}
|
||||
}
|
||||
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
func addAttr(n *Node, key, val string) {
|
||||
var attr xml.Attr
|
||||
if i := strings.Index(key, ":"); i > 0 {
|
||||
attr = xml.Attr{
|
||||
Name: xml.Name{Space: key[:i], Local: key[i+1:]},
|
||||
Value: val,
|
||||
}
|
||||
} else {
|
||||
attr = xml.Attr{
|
||||
Name: xml.Name{Local: key},
|
||||
Value: val,
|
||||
}
|
||||
}
|
||||
|
||||
n.Attr = append(n.Attr, attr)
|
||||
}
|
||||
|
||||
func addChild(parent, n *Node) {
|
||||
n.Parent = parent
|
||||
if parent.FirstChild == nil {
|
||||
parent.FirstChild = n
|
||||
} else {
|
||||
parent.LastChild.NextSibling = n
|
||||
n.PrevSibling = parent.LastChild
|
||||
}
|
||||
|
||||
parent.LastChild = n
|
||||
}
|
||||
|
||||
func addSibling(sibling, n *Node) {
|
||||
for t := sibling.NextSibling; t != nil; t = t.NextSibling {
|
||||
sibling = t
|
||||
}
|
||||
n.Parent = sibling.Parent
|
||||
sibling.NextSibling = n
|
||||
n.PrevSibling = sibling
|
||||
if sibling.Parent != nil {
|
||||
sibling.Parent.LastChild = n
|
||||
}
|
||||
}
|
||||
|
||||
// LoadURL loads the XML document from the specified URL.
|
||||
func LoadURL(url string) (*Node, error) {
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
return parse(resp.Body)
|
||||
}
|
||||
|
||||
func parse(r io.Reader) (*Node, error) {
|
||||
var (
|
||||
decoder = xml.NewDecoder(r)
|
||||
doc = &Node{Type: DocumentNode}
|
||||
space2prefix = make(map[string]string)
|
||||
level = 0
|
||||
)
|
||||
// http://www.w3.org/XML/1998/namespace is bound by definition to the prefix xml.
|
||||
space2prefix["http://www.w3.org/XML/1998/namespace"] = "xml"
|
||||
decoder.CharsetReader = charset.NewReaderLabel
|
||||
prev := doc
|
||||
for {
|
||||
tok, err := decoder.Token()
|
||||
switch {
|
||||
case err == io.EOF:
|
||||
goto quit
|
||||
case err != nil:
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch tok := tok.(type) {
|
||||
case xml.StartElement:
|
||||
if level == 0 {
|
||||
// mising XML declaration
|
||||
node := &Node{Type: DeclarationNode, Data: "xml", level: 1}
|
||||
addChild(prev, node)
|
||||
level = 1
|
||||
prev = node
|
||||
}
|
||||
// https://www.w3.org/TR/xml-names/#scoping-defaulting
|
||||
for _, att := range tok.Attr {
|
||||
if att.Name.Local == "xmlns" {
|
||||
space2prefix[att.Value] = ""
|
||||
} else if att.Name.Space == "xmlns" {
|
||||
space2prefix[att.Value] = att.Name.Local
|
||||
}
|
||||
}
|
||||
|
||||
if tok.Name.Space != "" {
|
||||
if _, found := space2prefix[tok.Name.Space]; !found {
|
||||
return nil, errors.New("xmlquery: invalid XML document, namespace is missing")
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < len(tok.Attr); i++ {
|
||||
att := &tok.Attr[i]
|
||||
if prefix, ok := space2prefix[att.Name.Space]; ok {
|
||||
att.Name.Space = prefix
|
||||
}
|
||||
}
|
||||
|
||||
node := &Node{
|
||||
Type: ElementNode,
|
||||
Data: tok.Name.Local,
|
||||
Prefix: space2prefix[tok.Name.Space],
|
||||
NamespaceURI: tok.Name.Space,
|
||||
Attr: tok.Attr,
|
||||
level: level,
|
||||
}
|
||||
//fmt.Println(fmt.Sprintf("start > %s : %d", node.Data, level))
|
||||
if level == prev.level {
|
||||
addSibling(prev, node)
|
||||
} else if level > prev.level {
|
||||
addChild(prev, node)
|
||||
} else if level < prev.level {
|
||||
for i := prev.level - level; i > 1; i-- {
|
||||
prev = prev.Parent
|
||||
}
|
||||
addSibling(prev.Parent, node)
|
||||
}
|
||||
prev = node
|
||||
level++
|
||||
case xml.EndElement:
|
||||
level--
|
||||
case xml.CharData:
|
||||
node := &Node{Type: TextNode, Data: string(tok), level: level}
|
||||
if level == prev.level {
|
||||
addSibling(prev, node)
|
||||
} else if level > prev.level {
|
||||
addChild(prev, node)
|
||||
}
|
||||
case xml.Comment:
|
||||
node := &Node{Type: CommentNode, Data: string(tok), level: level}
|
||||
if level == prev.level {
|
||||
addSibling(prev, node)
|
||||
} else if level > prev.level {
|
||||
addChild(prev, node)
|
||||
} else if level < prev.level {
|
||||
for i := prev.level - level; i > 1; i-- {
|
||||
prev = prev.Parent
|
||||
}
|
||||
addSibling(prev.Parent, node)
|
||||
}
|
||||
case xml.ProcInst: // Processing Instruction
|
||||
if prev.Type != DeclarationNode {
|
||||
level++
|
||||
}
|
||||
node := &Node{Type: DeclarationNode, Data: tok.Target, level: level}
|
||||
pairs := strings.Split(string(tok.Inst), " ")
|
||||
for _, pair := range pairs {
|
||||
pair = strings.TrimSpace(pair)
|
||||
if i := strings.Index(pair, "="); i > 0 {
|
||||
addAttr(node, pair[:i], strings.Trim(pair[i+1:], `"`))
|
||||
}
|
||||
}
|
||||
if level == prev.level {
|
||||
addSibling(prev, node)
|
||||
} else if level > prev.level {
|
||||
addChild(prev, node)
|
||||
}
|
||||
prev = node
|
||||
case xml.Directive:
|
||||
}
|
||||
|
||||
}
|
||||
quit:
|
||||
return doc, nil
|
||||
}
|
||||
|
||||
// Parse returns the parse tree for the XML from the given Reader.
|
||||
func Parse(r io.Reader) (*Node, error) {
|
||||
return parse(r)
|
||||
}
|
|
@ -0,0 +1,264 @@
|
|||
/*
|
||||
Package xmlquery provides extract data from XML documents using XPath expression.
|
||||
*/
|
||||
package xmlquery
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/antchfx/xpath"
|
||||
)
|
||||
|
||||
// SelectElements finds child elements with the specified name.
|
||||
func (n *Node) SelectElements(name string) []*Node {
|
||||
return Find(n, name)
|
||||
}
|
||||
|
||||
// SelectElement finds child elements with the specified name.
|
||||
func (n *Node) SelectElement(name string) *Node {
|
||||
return FindOne(n, name)
|
||||
}
|
||||
|
||||
// SelectAttr returns the attribute value with the specified name.
|
||||
func (n *Node) SelectAttr(name string) string {
|
||||
if n.Type == AttributeNode {
|
||||
if n.Data == name {
|
||||
return n.InnerText()
|
||||
}
|
||||
return ""
|
||||
}
|
||||
var local, space string
|
||||
local = name
|
||||
if i := strings.Index(name, ":"); i > 0 {
|
||||
space = name[:i]
|
||||
local = name[i+1:]
|
||||
}
|
||||
for _, attr := range n.Attr {
|
||||
if attr.Name.Local == local && attr.Name.Space == space {
|
||||
return attr.Value
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
var _ xpath.NodeNavigator = &NodeNavigator{}
|
||||
|
||||
// CreateXPathNavigator creates a new xpath.NodeNavigator for the specified html.Node.
|
||||
func CreateXPathNavigator(top *Node) *NodeNavigator {
|
||||
return &NodeNavigator{curr: top, root: top, attr: -1}
|
||||
}
|
||||
|
||||
func getCurrentNode(it *xpath.NodeIterator) *Node {
|
||||
n := it.Current().(*NodeNavigator)
|
||||
if n.NodeType() == xpath.AttributeNode {
|
||||
childNode := &Node{
|
||||
Type: TextNode,
|
||||
Data: n.Value(),
|
||||
}
|
||||
return &Node{
|
||||
Type: AttributeNode,
|
||||
Data: n.LocalName(),
|
||||
FirstChild: childNode,
|
||||
LastChild: childNode,
|
||||
}
|
||||
}
|
||||
return n.curr
|
||||
}
|
||||
|
||||
// Find searches the Node that matches by the specified XPath expr.
|
||||
func Find(top *Node, expr string) []*Node {
|
||||
exp, err := xpath.Compile(expr)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
t := exp.Select(CreateXPathNavigator(top))
|
||||
var elems []*Node
|
||||
for t.MoveNext() {
|
||||
elems = append(elems, getCurrentNode(t))
|
||||
}
|
||||
return elems
|
||||
}
|
||||
|
||||
// FindOne searches the Node that matches by the specified XPath expr,
|
||||
// and returns first element of matched.
|
||||
func FindOne(top *Node, expr string) *Node {
|
||||
exp, err := xpath.Compile(expr)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
t := exp.Select(CreateXPathNavigator(top))
|
||||
var elem *Node
|
||||
if t.MoveNext() {
|
||||
elem = getCurrentNode(t)
|
||||
}
|
||||
return elem
|
||||
}
|
||||
|
||||
// FindEach searches the html.Node and calls functions cb.
|
||||
// Important: this method has deprecated, recommend use for .. = range Find(){}.
|
||||
func FindEach(top *Node, expr string, cb func(int, *Node)) {
|
||||
for i, n := range Find(top, expr) {
|
||||
cb(i, n)
|
||||
}
|
||||
}
|
||||
|
||||
// FindEachWithBreak functions the same as FindEach but allows you
|
||||
// to break the loop by returning false from your callback function, cb.
|
||||
// Important: this method has deprecated, recommend use for .. = range Find(){}.
|
||||
func FindEachWithBreak(top *Node, expr string, cb func(int, *Node) bool) {
|
||||
for i, n := range Find(top, expr) {
|
||||
if !cb(i, n) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type NodeNavigator struct {
|
||||
root, curr *Node
|
||||
attr int
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) Current() *Node {
|
||||
return x.curr
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) NodeType() xpath.NodeType {
|
||||
switch x.curr.Type {
|
||||
case CommentNode:
|
||||
return xpath.CommentNode
|
||||
case TextNode:
|
||||
return xpath.TextNode
|
||||
case DeclarationNode, DocumentNode:
|
||||
return xpath.RootNode
|
||||
case ElementNode:
|
||||
if x.attr != -1 {
|
||||
return xpath.AttributeNode
|
||||
}
|
||||
return xpath.ElementNode
|
||||
}
|
||||
panic(fmt.Sprintf("unknown XML node type: %v", x.curr.Type))
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) LocalName() string {
|
||||
if x.attr != -1 {
|
||||
return x.curr.Attr[x.attr].Name.Local
|
||||
}
|
||||
return x.curr.Data
|
||||
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) Prefix() string {
|
||||
if x.NodeType() == xpath.AttributeNode {
|
||||
if x.attr != -1 {
|
||||
return x.curr.Attr[x.attr].Name.Space
|
||||
}
|
||||
return ""
|
||||
}
|
||||
return x.curr.Prefix
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) Value() string {
|
||||
switch x.curr.Type {
|
||||
case CommentNode:
|
||||
return x.curr.Data
|
||||
case ElementNode:
|
||||
if x.attr != -1 {
|
||||
return x.curr.Attr[x.attr].Value
|
||||
}
|
||||
return x.curr.InnerText()
|
||||
case TextNode:
|
||||
return x.curr.Data
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) Copy() xpath.NodeNavigator {
|
||||
n := *x
|
||||
return &n
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) MoveToRoot() {
|
||||
x.curr = x.root
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) MoveToParent() bool {
|
||||
if x.attr != -1 {
|
||||
x.attr = -1
|
||||
return true
|
||||
} else if node := x.curr.Parent; node != nil {
|
||||
x.curr = node
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) MoveToNextAttribute() bool {
|
||||
if x.attr >= len(x.curr.Attr)-1 {
|
||||
return false
|
||||
}
|
||||
x.attr++
|
||||
return true
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) MoveToChild() bool {
|
||||
if x.attr != -1 {
|
||||
return false
|
||||
}
|
||||
if node := x.curr.FirstChild; node != nil {
|
||||
x.curr = node
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) MoveToFirst() bool {
|
||||
if x.attr != -1 || x.curr.PrevSibling == nil {
|
||||
return false
|
||||
}
|
||||
for {
|
||||
node := x.curr.PrevSibling
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
x.curr = node
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) String() string {
|
||||
return x.Value()
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) MoveToNext() bool {
|
||||
if x.attr != -1 {
|
||||
return false
|
||||
}
|
||||
if node := x.curr.NextSibling; node != nil {
|
||||
x.curr = node
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) MoveToPrevious() bool {
|
||||
if x.attr != -1 {
|
||||
return false
|
||||
}
|
||||
if node := x.curr.PrevSibling; node != nil {
|
||||
x.curr = node
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *NodeNavigator) MoveTo(other xpath.NodeNavigator) bool {
|
||||
node, ok := other.(*NodeNavigator)
|
||||
if !ok || node.root != x.root {
|
||||
return false
|
||||
}
|
||||
|
||||
x.curr = node.curr
|
||||
x.attr = node.attr
|
||||
return true
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
language: go
|
||||
go: 1.x
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Fatih Arslan
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@ -0,0 +1,58 @@
|
|||
# CamelCase [![GoDoc](http://img.shields.io/badge/go-documentation-blue.svg?style=flat-square)](http://godoc.org/github.com/fatih/camelcase) [![Build Status](http://img.shields.io/travis/fatih/camelcase.svg?style=flat-square)](https://travis-ci.org/fatih/camelcase)
|
||||
|
||||
CamelCase is a Golang (Go) package to split the words of a camelcase type
|
||||
string into a slice of words. It can be used to convert a camelcase word (lower
|
||||
or upper case) into any type of word.
|
||||
|
||||
## Splitting rules:
|
||||
|
||||
1. If string is not valid UTF-8, return it without splitting as
|
||||
single item array.
|
||||
2. Assign all unicode characters into one of 4 sets: lower case
|
||||
letters, upper case letters, numbers, and all other characters.
|
||||
3. Iterate through characters of string, introducing splits
|
||||
between adjacent characters that belong to different sets.
|
||||
4. Iterate through array of split strings, and if a given string
|
||||
is upper case:
|
||||
* if subsequent string is lower case:
|
||||
* move last character of upper case string to beginning of
|
||||
lower case string
|
||||
|
||||
## Install
|
||||
|
||||
```bash
|
||||
go get github.com/fatih/camelcase
|
||||
```
|
||||
|
||||
## Usage and examples
|
||||
|
||||
```go
|
||||
splitted := camelcase.Split("GolangPackage")
|
||||
|
||||
fmt.Println(splitted[0], splitted[1]) // prints: "Golang", "Package"
|
||||
```
|
||||
|
||||
Both lower camel case and upper camel case are supported. For more info please
|
||||
check: [http://en.wikipedia.org/wiki/CamelCase](http://en.wikipedia.org/wiki/CamelCase)
|
||||
|
||||
Below are some example cases:
|
||||
|
||||
```
|
||||
"" => []
|
||||
"lowercase" => ["lowercase"]
|
||||
"Class" => ["Class"]
|
||||
"MyClass" => ["My", "Class"]
|
||||
"MyC" => ["My", "C"]
|
||||
"HTML" => ["HTML"]
|
||||
"PDFLoader" => ["PDF", "Loader"]
|
||||
"AString" => ["A", "String"]
|
||||
"SimpleXMLParser" => ["Simple", "XML", "Parser"]
|
||||
"vimRPCPlugin" => ["vim", "RPC", "Plugin"]
|
||||
"GL11Version" => ["GL", "11", "Version"]
|
||||
"99Bottles" => ["99", "Bottles"]
|
||||
"May5" => ["May", "5"]
|
||||
"BFG9000" => ["BFG", "9000"]
|
||||
"BöseÜberraschung" => ["Böse", "Überraschung"]
|
||||
"Two spaces" => ["Two", " ", "spaces"]
|
||||
"BadUTF8\xe2\xe2\xa1" => ["BadUTF8\xe2\xe2\xa1"]
|
||||
```
|
|
@ -0,0 +1,90 @@
|
|||
// Package camelcase is a micro package to split the words of a camelcase type
|
||||
// string into a slice of words.
|
||||
package camelcase
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Split splits the camelcase word and returns a list of words. It also
|
||||
// supports digits. Both lower camel case and upper camel case are supported.
|
||||
// For more info please check: http://en.wikipedia.org/wiki/CamelCase
|
||||
//
|
||||
// Examples
|
||||
//
|
||||
// "" => [""]
|
||||
// "lowercase" => ["lowercase"]
|
||||
// "Class" => ["Class"]
|
||||
// "MyClass" => ["My", "Class"]
|
||||
// "MyC" => ["My", "C"]
|
||||
// "HTML" => ["HTML"]
|
||||
// "PDFLoader" => ["PDF", "Loader"]
|
||||
// "AString" => ["A", "String"]
|
||||
// "SimpleXMLParser" => ["Simple", "XML", "Parser"]
|
||||
// "vimRPCPlugin" => ["vim", "RPC", "Plugin"]
|
||||
// "GL11Version" => ["GL", "11", "Version"]
|
||||
// "99Bottles" => ["99", "Bottles"]
|
||||
// "May5" => ["May", "5"]
|
||||
// "BFG9000" => ["BFG", "9000"]
|
||||
// "BöseÜberraschung" => ["Böse", "Überraschung"]
|
||||
// "Two spaces" => ["Two", " ", "spaces"]
|
||||
// "BadUTF8\xe2\xe2\xa1" => ["BadUTF8\xe2\xe2\xa1"]
|
||||
//
|
||||
// Splitting rules
|
||||
//
|
||||
// 1) If string is not valid UTF-8, return it without splitting as
|
||||
// single item array.
|
||||
// 2) Assign all unicode characters into one of 4 sets: lower case
|
||||
// letters, upper case letters, numbers, and all other characters.
|
||||
// 3) Iterate through characters of string, introducing splits
|
||||
// between adjacent characters that belong to different sets.
|
||||
// 4) Iterate through array of split strings, and if a given string
|
||||
// is upper case:
|
||||
// if subsequent string is lower case:
|
||||
// move last character of upper case string to beginning of
|
||||
// lower case string
|
||||
func Split(src string) (entries []string) {
|
||||
// don't split invalid utf8
|
||||
if !utf8.ValidString(src) {
|
||||
return []string{src}
|
||||
}
|
||||
entries = []string{}
|
||||
var runes [][]rune
|
||||
lastClass := 0
|
||||
class := 0
|
||||
// split into fields based on class of unicode character
|
||||
for _, r := range src {
|
||||
switch true {
|
||||
case unicode.IsLower(r):
|
||||
class = 1
|
||||
case unicode.IsUpper(r):
|
||||
class = 2
|
||||
case unicode.IsDigit(r):
|
||||
class = 3
|
||||
default:
|
||||
class = 4
|
||||
}
|
||||
if class == lastClass {
|
||||
runes[len(runes)-1] = append(runes[len(runes)-1], r)
|
||||
} else {
|
||||
runes = append(runes, []rune{r})
|
||||
}
|
||||
lastClass = class
|
||||
}
|
||||
// handle upper case -> lower case sequences, e.g.
|
||||
// "PDFL", "oader" -> "PDF", "Loader"
|
||||
for i := 0; i < len(runes)-1; i++ {
|
||||
if unicode.IsUpper(runes[i][0]) && unicode.IsLower(runes[i+1][0]) {
|
||||
runes[i+1] = append([]rune{runes[i][len(runes[i])-1]}, runes[i+1]...)
|
||||
runes[i] = runes[i][:len(runes[i])-1]
|
||||
}
|
||||
}
|
||||
// construct []string from results
|
||||
for _, s := range runes {
|
||||
if len(s) > 0 {
|
||||
entries = append(entries, string(s))
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
language: go
|
||||
go:
|
||||
- 1.7.x
|
||||
- tip
|
|
@ -0,0 +1,60 @@
|
|||
Copyright (c) 2017, Fatih Arslan
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of structtag nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
This software includes some portions from Go. Go is used under the terms of the
|
||||
BSD like license.
|
||||
|
||||
Copyright (c) 2012 The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
The Go gopher was designed by Renee French. http://reneefrench.blogspot.com/ The design is licensed under the Creative Commons 3.0 Attributions license. Read this article for more details: https://blog.golang.org/gopher
|
|
@ -0,0 +1,73 @@
|
|||
# structtag [![GoDoc](http://img.shields.io/badge/go-documentation-blue.svg?style=flat-square)](http://godoc.org/github.com/fatih/structtag) [![Build Status](https://travis-ci.org/fatih/structtag.svg?branch=master)](https://travis-ci.org/fatih/structtag)
|
||||
|
||||
structtag provides an easy way of parsing and manipulating struct tag fields.
|
||||
Please vendor the library as it might change in future versions.
|
||||
|
||||
# Install
|
||||
|
||||
```bash
|
||||
go get github.com/fatih/structtag
|
||||
```
|
||||
|
||||
# Example
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/fatih/structtag"
|
||||
)
|
||||
|
||||
func main() {
|
||||
type t struct {
|
||||
t string `json:"foo,omitempty,string" xml:"foo"`
|
||||
}
|
||||
|
||||
// get field tag
|
||||
tag := reflect.TypeOf(t{}).Field(0).Tag
|
||||
|
||||
// ... and start using structtag by parsing the tag
|
||||
tags, err := structtag.Parse(string(tag))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// iterate over all tags
|
||||
for _, t := range tags.Tags() {
|
||||
fmt.Printf("tag: %+v\n", t)
|
||||
}
|
||||
|
||||
// get a single tag
|
||||
jsonTag, err := tags.Get("json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
fmt.Println(jsonTag) // Output: json:"foo,omitempty,string"
|
||||
fmt.Println(jsonTag.Key) // Output: json
|
||||
fmt.Println(jsonTag.Name) // Output: foo
|
||||
fmt.Println(jsonTag.Options) // Output: [omitempty string]
|
||||
|
||||
// change existing tag
|
||||
jsonTag.Name = "foo_bar"
|
||||
jsonTag.Options = nil
|
||||
tags.Set(jsonTag)
|
||||
|
||||
// add new tag
|
||||
tags.Set(&structtag.Tag{
|
||||
Key: "hcl",
|
||||
Name: "foo",
|
||||
Options: []string{"squash"},
|
||||
})
|
||||
|
||||
// print the tags
|
||||
fmt.Println(tags) // Output: json:"foo_bar" xml:"foo" hcl:"foo,squash"
|
||||
|
||||
// sort tags according to keys
|
||||
sort.Sort(tags)
|
||||
fmt.Println(tags) // Output: hcl:"foo,squash" json:"foo_bar" xml:"foo"
|
||||
}
|
||||
```
|
|
@ -0,0 +1,309 @@
|
|||
package structtag
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
errTagSyntax = errors.New("bad syntax for struct tag pair")
|
||||
errTagKeySyntax = errors.New("bad syntax for struct tag key")
|
||||
errTagValueSyntax = errors.New("bad syntax for struct tag value")
|
||||
|
||||
errKeyNotSet = errors.New("tag key does not exist")
|
||||
errTagNotExist = errors.New("tag does not exist")
|
||||
errTagKeyMismatch = errors.New("mismatch between key and tag.key")
|
||||
)
|
||||
|
||||
// Tags represent a set of tags from a single struct field
|
||||
type Tags struct {
|
||||
tags []*Tag
|
||||
}
|
||||
|
||||
// Tag defines a single struct's string literal tag
|
||||
type Tag struct {
|
||||
// Key is the tag key, such as json, xml, etc..
|
||||
// i.e: `json:"foo,omitempty". Here key is: "json"
|
||||
Key string
|
||||
|
||||
// Name is a part of the value
|
||||
// i.e: `json:"foo,omitempty". Here name is: "foo"
|
||||
Name string
|
||||
|
||||
// Options is a part of the value. It contains a slice of tag options i.e:
|
||||
// `json:"foo,omitempty". Here options is: ["omitempty"]
|
||||
Options []string
|
||||
}
|
||||
|
||||
// Parse parses a single struct field tag and returns the set of tags.
|
||||
func Parse(tag string) (*Tags, error) {
|
||||
var tags []*Tag
|
||||
|
||||
// NOTE(arslan) following code is from reflect and vet package with some
|
||||
// modifications to collect all necessary information and extend it with
|
||||
// usable methods
|
||||
for tag != "" {
|
||||
// Skip leading space.
|
||||
i := 0
|
||||
for i < len(tag) && tag[i] == ' ' {
|
||||
i++
|
||||
}
|
||||
tag = tag[i:]
|
||||
if tag == "" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Scan to colon. A space, a quote or a control character is a syntax
|
||||
// error. Strictly speaking, control chars include the range [0x7f,
|
||||
// 0x9f], not just [0x00, 0x1f], but in practice, we ignore the
|
||||
// multi-byte control characters as it is simpler to inspect the tag's
|
||||
// bytes than the tag's runes.
|
||||
i = 0
|
||||
for i < len(tag) && tag[i] > ' ' && tag[i] != ':' && tag[i] != '"' && tag[i] != 0x7f {
|
||||
i++
|
||||
}
|
||||
|
||||
if i == 0 {
|
||||
return nil, errTagKeySyntax
|
||||
}
|
||||
if i+1 >= len(tag) || tag[i] != ':' {
|
||||
return nil, errTagSyntax
|
||||
}
|
||||
if tag[i+1] != '"' {
|
||||
return nil, errTagValueSyntax
|
||||
}
|
||||
|
||||
key := string(tag[:i])
|
||||
tag = tag[i+1:]
|
||||
|
||||
// Scan quoted string to find value.
|
||||
i = 1
|
||||
for i < len(tag) && tag[i] != '"' {
|
||||
if tag[i] == '\\' {
|
||||
i++
|
||||
}
|
||||
i++
|
||||
}
|
||||
if i >= len(tag) {
|
||||
return nil, errTagValueSyntax
|
||||
}
|
||||
|
||||
qvalue := string(tag[:i+1])
|
||||
tag = tag[i+1:]
|
||||
|
||||
value, err := strconv.Unquote(qvalue)
|
||||
if err != nil {
|
||||
return nil, errTagValueSyntax
|
||||
}
|
||||
|
||||
res := strings.Split(value, ",")
|
||||
name := res[0]
|
||||
options := res[1:]
|
||||
if len(options) == 0 {
|
||||
options = nil
|
||||
}
|
||||
|
||||
tags = append(tags, &Tag{
|
||||
Key: key,
|
||||
Name: name,
|
||||
Options: options,
|
||||
})
|
||||
}
|
||||
|
||||
return &Tags{
|
||||
tags: tags,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Get returns the tag associated with the given key. If the key is present
|
||||
// in the tag the value (which may be empty) is returned. Otherwise the
|
||||
// returned value will be the empty string. The ok return value reports whether
|
||||
// the tag exists or not (which the return value is nil).
|
||||
func (t *Tags) Get(key string) (*Tag, error) {
|
||||
for _, tag := range t.tags {
|
||||
if tag.Key == key {
|
||||
return tag, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, errTagNotExist
|
||||
}
|
||||
|
||||
// Set sets the given tag. If the tag key already exists it'll override it
|
||||
func (t *Tags) Set(tag *Tag) error {
|
||||
if tag.Key == "" {
|
||||
return errKeyNotSet
|
||||
}
|
||||
|
||||
added := false
|
||||
for i, tg := range t.tags {
|
||||
if tg.Key == tag.Key {
|
||||
added = true
|
||||
t.tags[i] = tag
|
||||
}
|
||||
}
|
||||
|
||||
if !added {
|
||||
// this means this is a new tag, add it
|
||||
t.tags = append(t.tags, tag)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddOptions adds the given option for the given key. If the option already
|
||||
// exists it doesn't add it again.
|
||||
func (t *Tags) AddOptions(key string, options ...string) {
|
||||
for i, tag := range t.tags {
|
||||
if tag.Key != key {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, opt := range options {
|
||||
if !tag.HasOption(opt) {
|
||||
tag.Options = append(tag.Options, opt)
|
||||
}
|
||||
}
|
||||
|
||||
t.tags[i] = tag
|
||||
}
|
||||
}
|
||||
|
||||
// DeleteOptions deletes the given options for the given key
|
||||
func (t *Tags) DeleteOptions(key string, options ...string) {
|
||||
hasOption := func(option string) bool {
|
||||
for _, opt := range options {
|
||||
if opt == option {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
for i, tag := range t.tags {
|
||||
if tag.Key != key {
|
||||
continue
|
||||
}
|
||||
|
||||
var updated []string
|
||||
for _, opt := range tag.Options {
|
||||
if !hasOption(opt) {
|
||||
updated = append(updated, opt)
|
||||
}
|
||||
}
|
||||
|
||||
tag.Options = updated
|
||||
t.tags[i] = tag
|
||||
}
|
||||
}
|
||||
|
||||
// Delete deletes the tag for the given keys
|
||||
func (t *Tags) Delete(keys ...string) {
|
||||
hasKey := func(key string) bool {
|
||||
for _, k := range keys {
|
||||
if k == key {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
var updated []*Tag
|
||||
for _, tag := range t.tags {
|
||||
if !hasKey(tag.Key) {
|
||||
updated = append(updated, tag)
|
||||
}
|
||||
}
|
||||
|
||||
t.tags = updated
|
||||
}
|
||||
|
||||
// Tags returns a slice of tags. The order is the original tag order unless it
|
||||
// was changed.
|
||||
func (t *Tags) Tags() []*Tag {
|
||||
return t.tags
|
||||
}
|
||||
|
||||
// Tags returns a slice of tags. The order is the original tag order unless it
|
||||
// was changed.
|
||||
func (t *Tags) Keys() []string {
|
||||
var keys []string
|
||||
for _, tag := range t.tags {
|
||||
keys = append(keys, tag.Key)
|
||||
}
|
||||
return keys
|
||||
}
|
||||
|
||||
// String reassembles the tags into a valid literal tag field representation
|
||||
func (t *Tags) String() string {
|
||||
tags := t.Tags()
|
||||
if len(tags) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
for i, tag := range t.Tags() {
|
||||
buf.WriteString(tag.String())
|
||||
if i != len(tags)-1 {
|
||||
buf.WriteString(" ")
|
||||
}
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// HasOption returns true if the given option is available in options
|
||||
func (t *Tag) HasOption(opt string) bool {
|
||||
for _, tagOpt := range t.Options {
|
||||
if tagOpt == opt {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Value returns the raw value of the tag, i.e. if the tag is
|
||||
// `json:"foo,omitempty", the Value is "foo,omitempty"
|
||||
func (t *Tag) Value() string {
|
||||
options := strings.Join(t.Options, ",")
|
||||
if options != "" {
|
||||
return fmt.Sprintf(`%s,%s`, t.Name, options)
|
||||
}
|
||||
return t.Name
|
||||
}
|
||||
|
||||
// String reassembles the tag into a valid tag field representation
|
||||
func (t *Tag) String() string {
|
||||
return fmt.Sprintf(`%s:"%s"`, t.Key, t.Value())
|
||||
}
|
||||
|
||||
// GoString implements the fmt.GoStringer interface
|
||||
func (t *Tag) GoString() string {
|
||||
template := `{
|
||||
Key: '%s',
|
||||
Name: '%s',
|
||||
Option: '%s',
|
||||
}`
|
||||
|
||||
if t.Options == nil {
|
||||
return fmt.Sprintf(template, t.Key, t.Name, "nil")
|
||||
}
|
||||
|
||||
options := strings.Join(t.Options, ",")
|
||||
return fmt.Sprintf(template, t.Key, t.Name, options)
|
||||
}
|
||||
|
||||
func (t *Tags) Len() int {
|
||||
return len(t.tags)
|
||||
}
|
||||
|
||||
func (t *Tags) Less(i int, j int) bool {
|
||||
return t.tags[i].Key < t.tags[j].Key
|
||||
}
|
||||
|
||||
func (t *Tags) Swap(i int, j int) {
|
||||
t.tags[i], t.tags[j] = t.tags[j], t.tags[i]
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
glob.iml
|
||||
.idea
|
||||
*.cpu
|
||||
*.mem
|
||||
*.test
|
||||
*.dot
|
||||
*.png
|
||||
*.svg
|
|
@ -0,0 +1,9 @@
|
|||
sudo: false
|
||||
|
||||
language: go
|
||||
|
||||
go:
|
||||
- 1.5.3
|
||||
|
||||
script:
|
||||
- go test -v ./...
|
|
@ -0,0 +1,21 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2016 Sergey Kamardin
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -0,0 +1,26 @@
|
|||
#! /bin/bash
|
||||
|
||||
bench() {
|
||||
filename="/tmp/$1-$2.bench"
|
||||
if test -e "${filename}";
|
||||
then
|
||||
echo "Already exists ${filename}"
|
||||
else
|
||||
backup=`git rev-parse --abbrev-ref HEAD`
|
||||
git checkout $1
|
||||
echo -n "Creating ${filename}... "
|
||||
go test ./... -run=NONE -bench=$2 > "${filename}" -benchmem
|
||||
echo "OK"
|
||||
git checkout ${backup}
|
||||
sleep 5
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
to=$1
|
||||
current=`git rev-parse --abbrev-ref HEAD`
|
||||
|
||||
bench ${to} $2
|
||||
bench ${current} $2
|
||||
|
||||
benchcmp $3 "/tmp/${to}-$2.bench" "/tmp/${current}-$2.bench"
|
|
@ -0,0 +1,525 @@
|
|||
package compiler
|
||||
|
||||
// TODO use constructor with all matchers, and to their structs private
|
||||
// TODO glue multiple Text nodes (like after QuoteMeta)
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/gobwas/glob/match"
|
||||
"github.com/gobwas/glob/syntax/ast"
|
||||
"github.com/gobwas/glob/util/runes"
|
||||
)
|
||||
|
||||
func optimizeMatcher(matcher match.Matcher) match.Matcher {
|
||||
switch m := matcher.(type) {
|
||||
|
||||
case match.Any:
|
||||
if len(m.Separators) == 0 {
|
||||
return match.NewSuper()
|
||||
}
|
||||
|
||||
case match.AnyOf:
|
||||
if len(m.Matchers) == 1 {
|
||||
return m.Matchers[0]
|
||||
}
|
||||
|
||||
return m
|
||||
|
||||
case match.List:
|
||||
if m.Not == false && len(m.List) == 1 {
|
||||
return match.NewText(string(m.List))
|
||||
}
|
||||
|
||||
return m
|
||||
|
||||
case match.BTree:
|
||||
m.Left = optimizeMatcher(m.Left)
|
||||
m.Right = optimizeMatcher(m.Right)
|
||||
|
||||
r, ok := m.Value.(match.Text)
|
||||
if !ok {
|
||||
return m
|
||||
}
|
||||
|
||||
var (
|
||||
leftNil = m.Left == nil
|
||||
rightNil = m.Right == nil
|
||||
)
|
||||
if leftNil && rightNil {
|
||||
return match.NewText(r.Str)
|
||||
}
|
||||
|
||||
_, leftSuper := m.Left.(match.Super)
|
||||
lp, leftPrefix := m.Left.(match.Prefix)
|
||||
la, leftAny := m.Left.(match.Any)
|
||||
|
||||
_, rightSuper := m.Right.(match.Super)
|
||||
rs, rightSuffix := m.Right.(match.Suffix)
|
||||
ra, rightAny := m.Right.(match.Any)
|
||||
|
||||
switch {
|
||||
case leftSuper && rightSuper:
|
||||
return match.NewContains(r.Str, false)
|
||||
|
||||
case leftSuper && rightNil:
|
||||
return match.NewSuffix(r.Str)
|
||||
|
||||
case rightSuper && leftNil:
|
||||
return match.NewPrefix(r.Str)
|
||||
|
||||
case leftNil && rightSuffix:
|
||||
return match.NewPrefixSuffix(r.Str, rs.Suffix)
|
||||
|
||||
case rightNil && leftPrefix:
|
||||
return match.NewPrefixSuffix(lp.Prefix, r.Str)
|
||||
|
||||
case rightNil && leftAny:
|
||||
return match.NewSuffixAny(r.Str, la.Separators)
|
||||
|
||||
case leftNil && rightAny:
|
||||
return match.NewPrefixAny(r.Str, ra.Separators)
|
||||
}
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
return matcher
|
||||
}
|
||||
|
||||
func compileMatchers(matchers []match.Matcher) (match.Matcher, error) {
|
||||
if len(matchers) == 0 {
|
||||
return nil, fmt.Errorf("compile error: need at least one matcher")
|
||||
}
|
||||
if len(matchers) == 1 {
|
||||
return matchers[0], nil
|
||||
}
|
||||
if m := glueMatchers(matchers); m != nil {
|
||||
return m, nil
|
||||
}
|
||||
|
||||
idx := -1
|
||||
maxLen := -1
|
||||
var val match.Matcher
|
||||
for i, matcher := range matchers {
|
||||
if l := matcher.Len(); l != -1 && l >= maxLen {
|
||||
maxLen = l
|
||||
idx = i
|
||||
val = matcher
|
||||
}
|
||||
}
|
||||
|
||||
if val == nil { // not found matcher with static length
|
||||
r, err := compileMatchers(matchers[1:])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return match.NewBTree(matchers[0], nil, r), nil
|
||||
}
|
||||
|
||||
left := matchers[:idx]
|
||||
var right []match.Matcher
|
||||
if len(matchers) > idx+1 {
|
||||
right = matchers[idx+1:]
|
||||
}
|
||||
|
||||
var l, r match.Matcher
|
||||
var err error
|
||||
if len(left) > 0 {
|
||||
l, err = compileMatchers(left)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if len(right) > 0 {
|
||||
r, err = compileMatchers(right)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return match.NewBTree(val, l, r), nil
|
||||
}
|
||||
|
||||
func glueMatchers(matchers []match.Matcher) match.Matcher {
|
||||
if m := glueMatchersAsEvery(matchers); m != nil {
|
||||
return m
|
||||
}
|
||||
if m := glueMatchersAsRow(matchers); m != nil {
|
||||
return m
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func glueMatchersAsRow(matchers []match.Matcher) match.Matcher {
|
||||
if len(matchers) <= 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
c []match.Matcher
|
||||
l int
|
||||
)
|
||||
for _, matcher := range matchers {
|
||||
if ml := matcher.Len(); ml == -1 {
|
||||
return nil
|
||||
} else {
|
||||
c = append(c, matcher)
|
||||
l += ml
|
||||
}
|
||||
}
|
||||
return match.NewRow(l, c...)
|
||||
}
|
||||
|
||||
func glueMatchersAsEvery(matchers []match.Matcher) match.Matcher {
|
||||
if len(matchers) <= 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
hasAny bool
|
||||
hasSuper bool
|
||||
hasSingle bool
|
||||
min int
|
||||
separator []rune
|
||||
)
|
||||
|
||||
for i, matcher := range matchers {
|
||||
var sep []rune
|
||||
|
||||
switch m := matcher.(type) {
|
||||
case match.Super:
|
||||
sep = []rune{}
|
||||
hasSuper = true
|
||||
|
||||
case match.Any:
|
||||
sep = m.Separators
|
||||
hasAny = true
|
||||
|
||||
case match.Single:
|
||||
sep = m.Separators
|
||||
hasSingle = true
|
||||
min++
|
||||
|
||||
case match.List:
|
||||
if !m.Not {
|
||||
return nil
|
||||
}
|
||||
sep = m.List
|
||||
hasSingle = true
|
||||
min++
|
||||
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
|
||||
// initialize
|
||||
if i == 0 {
|
||||
separator = sep
|
||||
}
|
||||
|
||||
if runes.Equal(sep, separator) {
|
||||
continue
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
if hasSuper && !hasAny && !hasSingle {
|
||||
return match.NewSuper()
|
||||
}
|
||||
|
||||
if hasAny && !hasSuper && !hasSingle {
|
||||
return match.NewAny(separator)
|
||||
}
|
||||
|
||||
if (hasAny || hasSuper) && min > 0 && len(separator) == 0 {
|
||||
return match.NewMin(min)
|
||||
}
|
||||
|
||||
every := match.NewEveryOf()
|
||||
|
||||
if min > 0 {
|
||||
every.Add(match.NewMin(min))
|
||||
|
||||
if !hasAny && !hasSuper {
|
||||
every.Add(match.NewMax(min))
|
||||
}
|
||||
}
|
||||
|
||||
if len(separator) > 0 {
|
||||
every.Add(match.NewContains(string(separator), true))
|
||||
}
|
||||
|
||||
return every
|
||||
}
|
||||
|
||||
func minimizeMatchers(matchers []match.Matcher) []match.Matcher {
|
||||
var done match.Matcher
|
||||
var left, right, count int
|
||||
|
||||
for l := 0; l < len(matchers); l++ {
|
||||
for r := len(matchers); r > l; r-- {
|
||||
if glued := glueMatchers(matchers[l:r]); glued != nil {
|
||||
var swap bool
|
||||
|
||||
if done == nil {
|
||||
swap = true
|
||||
} else {
|
||||
cl, gl := done.Len(), glued.Len()
|
||||
swap = cl > -1 && gl > -1 && gl > cl
|
||||
swap = swap || count < r-l
|
||||
}
|
||||
|
||||
if swap {
|
||||
done = glued
|
||||
left = l
|
||||
right = r
|
||||
count = r - l
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if done == nil {
|
||||
return matchers
|
||||
}
|
||||
|
||||
next := append(append([]match.Matcher{}, matchers[:left]...), done)
|
||||
if right < len(matchers) {
|
||||
next = append(next, matchers[right:]...)
|
||||
}
|
||||
|
||||
if len(next) == len(matchers) {
|
||||
return next
|
||||
}
|
||||
|
||||
return minimizeMatchers(next)
|
||||
}
|
||||
|
||||
// minimizeAnyOf tries to apply some heuristics to minimize number of nodes in given tree
|
||||
func minimizeTree(tree *ast.Node) *ast.Node {
|
||||
switch tree.Kind {
|
||||
case ast.KindAnyOf:
|
||||
return minimizeTreeAnyOf(tree)
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// minimizeAnyOf tries to find common children of given node of AnyOf pattern
|
||||
// it searches for common children from left and from right
|
||||
// if any common children are found – then it returns new optimized ast tree
|
||||
// else it returns nil
|
||||
func minimizeTreeAnyOf(tree *ast.Node) *ast.Node {
|
||||
if !areOfSameKind(tree.Children, ast.KindPattern) {
|
||||
return nil
|
||||
}
|
||||
|
||||
commonLeft, commonRight := commonChildren(tree.Children)
|
||||
commonLeftCount, commonRightCount := len(commonLeft), len(commonRight)
|
||||
if commonLeftCount == 0 && commonRightCount == 0 { // there are no common parts
|
||||
return nil
|
||||
}
|
||||
|
||||
var result []*ast.Node
|
||||
if commonLeftCount > 0 {
|
||||
result = append(result, ast.NewNode(ast.KindPattern, nil, commonLeft...))
|
||||
}
|
||||
|
||||
var anyOf []*ast.Node
|
||||
for _, child := range tree.Children {
|
||||
reuse := child.Children[commonLeftCount : len(child.Children)-commonRightCount]
|
||||
var node *ast.Node
|
||||
if len(reuse) == 0 {
|
||||
// this pattern is completely reduced by commonLeft and commonRight patterns
|
||||
// so it become nothing
|
||||
node = ast.NewNode(ast.KindNothing, nil)
|
||||
} else {
|
||||
node = ast.NewNode(ast.KindPattern, nil, reuse...)
|
||||
}
|
||||
anyOf = appendIfUnique(anyOf, node)
|
||||
}
|
||||
switch {
|
||||
case len(anyOf) == 1 && anyOf[0].Kind != ast.KindNothing:
|
||||
result = append(result, anyOf[0])
|
||||
case len(anyOf) > 1:
|
||||
result = append(result, ast.NewNode(ast.KindAnyOf, nil, anyOf...))
|
||||
}
|
||||
|
||||
if commonRightCount > 0 {
|
||||
result = append(result, ast.NewNode(ast.KindPattern, nil, commonRight...))
|
||||
}
|
||||
|
||||
return ast.NewNode(ast.KindPattern, nil, result...)
|
||||
}
|
||||
|
||||
func commonChildren(nodes []*ast.Node) (commonLeft, commonRight []*ast.Node) {
|
||||
if len(nodes) <= 1 {
|
||||
return
|
||||
}
|
||||
|
||||
// find node that has least number of children
|
||||
idx := leastChildren(nodes)
|
||||
if idx == -1 {
|
||||
return
|
||||
}
|
||||
tree := nodes[idx]
|
||||
treeLength := len(tree.Children)
|
||||
|
||||
// allocate max able size for rightCommon slice
|
||||
// to get ability insert elements in reverse order (from end to start)
|
||||
// without sorting
|
||||
commonRight = make([]*ast.Node, treeLength)
|
||||
lastRight := treeLength // will use this to get results as commonRight[lastRight:]
|
||||
|
||||
var (
|
||||
breakLeft bool
|
||||
breakRight bool
|
||||
commonTotal int
|
||||
)
|
||||
for i, j := 0, treeLength-1; commonTotal < treeLength && j >= 0 && !(breakLeft && breakRight); i, j = i+1, j-1 {
|
||||
treeLeft := tree.Children[i]
|
||||
treeRight := tree.Children[j]
|
||||
|
||||
for k := 0; k < len(nodes) && !(breakLeft && breakRight); k++ {
|
||||
// skip least children node
|
||||
if k == idx {
|
||||
continue
|
||||
}
|
||||
|
||||
restLeft := nodes[k].Children[i]
|
||||
restRight := nodes[k].Children[j+len(nodes[k].Children)-treeLength]
|
||||
|
||||
breakLeft = breakLeft || !treeLeft.Equal(restLeft)
|
||||
|
||||
// disable searching for right common parts, if left part is already overlapping
|
||||
breakRight = breakRight || (!breakLeft && j <= i)
|
||||
breakRight = breakRight || !treeRight.Equal(restRight)
|
||||
}
|
||||
|
||||
if !breakLeft {
|
||||
commonTotal++
|
||||
commonLeft = append(commonLeft, treeLeft)
|
||||
}
|
||||
if !breakRight {
|
||||
commonTotal++
|
||||
lastRight = j
|
||||
commonRight[j] = treeRight
|
||||
}
|
||||
}
|
||||
|
||||
commonRight = commonRight[lastRight:]
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func appendIfUnique(target []*ast.Node, val *ast.Node) []*ast.Node {
|
||||
for _, n := range target {
|
||||
if reflect.DeepEqual(n, val) {
|
||||
return target
|
||||
}
|
||||
}
|
||||
return append(target, val)
|
||||
}
|
||||
|
||||
func areOfSameKind(nodes []*ast.Node, kind ast.Kind) bool {
|
||||
for _, n := range nodes {
|
||||
if n.Kind != kind {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func leastChildren(nodes []*ast.Node) int {
|
||||
min := -1
|
||||
idx := -1
|
||||
for i, n := range nodes {
|
||||
if idx == -1 || (len(n.Children) < min) {
|
||||
min = len(n.Children)
|
||||
idx = i
|
||||
}
|
||||
}
|
||||
return idx
|
||||
}
|
||||
|
||||
func compileTreeChildren(tree *ast.Node, sep []rune) ([]match.Matcher, error) {
|
||||
var matchers []match.Matcher
|
||||
for _, desc := range tree.Children {
|
||||
m, err := compile(desc, sep)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
matchers = append(matchers, optimizeMatcher(m))
|
||||
}
|
||||
return matchers, nil
|
||||
}
|
||||
|
||||
func compile(tree *ast.Node, sep []rune) (m match.Matcher, err error) {
|
||||
switch tree.Kind {
|
||||
case ast.KindAnyOf:
|
||||
// todo this could be faster on pattern_alternatives_combine_lite (see glob_test.go)
|
||||
if n := minimizeTree(tree); n != nil {
|
||||
return compile(n, sep)
|
||||
}
|
||||
matchers, err := compileTreeChildren(tree, sep)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return match.NewAnyOf(matchers...), nil
|
||||
|
||||
case ast.KindPattern:
|
||||
if len(tree.Children) == 0 {
|
||||
return match.NewNothing(), nil
|
||||
}
|
||||
matchers, err := compileTreeChildren(tree, sep)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m, err = compileMatchers(minimizeMatchers(matchers))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
case ast.KindAny:
|
||||
m = match.NewAny(sep)
|
||||
|
||||
case ast.KindSuper:
|
||||
m = match.NewSuper()
|
||||
|
||||
case ast.KindSingle:
|
||||
m = match.NewSingle(sep)
|
||||
|
||||
case ast.KindNothing:
|
||||
m = match.NewNothing()
|
||||
|
||||
case ast.KindList:
|
||||
l := tree.Value.(ast.List)
|
||||
m = match.NewList([]rune(l.Chars), l.Not)
|
||||
|
||||
case ast.KindRange:
|
||||
r := tree.Value.(ast.Range)
|
||||
m = match.NewRange(r.Lo, r.Hi, r.Not)
|
||||
|
||||
case ast.KindText:
|
||||
t := tree.Value.(ast.Text)
|
||||
m = match.NewText(t.Text)
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("could not compile tree: unknown node type")
|
||||
}
|
||||
|
||||
return optimizeMatcher(m), nil
|
||||
}
|
||||
|
||||
func Compile(tree *ast.Node, sep []rune) (match.Matcher, error) {
|
||||
m, err := compile(tree, sep)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
package glob
|
||||
|
||||
import (
|
||||
"github.com/gobwas/glob/compiler"
|
||||
"github.com/gobwas/glob/syntax"
|
||||
)
|
||||
|
||||
// Glob represents compiled glob pattern.
|
||||
type Glob interface {
|
||||
Match(string) bool
|
||||
}
|
||||
|
||||
// Compile creates Glob for given pattern and strings (if any present after pattern) as separators.
|
||||
// The pattern syntax is:
|
||||
//
|
||||
// pattern:
|
||||
// { term }
|
||||
//
|
||||
// term:
|
||||
// `*` matches any sequence of non-separator characters
|
||||
// `**` matches any sequence of characters
|
||||
// `?` matches any single non-separator character
|
||||
// `[` [ `!` ] { character-range } `]`
|
||||
// character class (must be non-empty)
|
||||
// `{` pattern-list `}`
|
||||
// pattern alternatives
|
||||
// c matches character c (c != `*`, `**`, `?`, `\`, `[`, `{`, `}`)
|
||||
// `\` c matches character c
|
||||
//
|
||||
// character-range:
|
||||
// c matches character c (c != `\\`, `-`, `]`)
|
||||
// `\` c matches character c
|
||||
// lo `-` hi matches character c for lo <= c <= hi
|
||||
//
|
||||
// pattern-list:
|
||||
// pattern { `,` pattern }
|
||||
// comma-separated (without spaces) patterns
|
||||
//
|
||||
func Compile(pattern string, separators ...rune) (Glob, error) {
|
||||
ast, err := syntax.Parse(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
matcher, err := compiler.Compile(ast, separators)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return matcher, nil
|
||||
}
|
||||
|
||||
// MustCompile is the same as Compile, except that if Compile returns error, this will panic
|
||||
func MustCompile(pattern string, separators ...rune) Glob {
|
||||
g, err := Compile(pattern, separators...)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return g
|
||||
}
|
||||
|
||||
// QuoteMeta returns a string that quotes all glob pattern meta characters
|
||||
// inside the argument text; For example, QuoteMeta(`{foo*}`) returns `\[foo\*\]`.
|
||||
func QuoteMeta(s string) string {
|
||||
b := make([]byte, 2*len(s))
|
||||
|
||||
// a byte loop is correct because all meta characters are ASCII
|
||||
j := 0
|
||||
for i := 0; i < len(s); i++ {
|
||||
if syntax.Special(s[i]) {
|
||||
b[j] = '\\'
|
||||
j++
|
||||
}
|
||||
b[j] = s[i]
|
||||
j++
|
||||
}
|
||||
|
||||
return string(b[0:j])
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/gobwas/glob/util/strings"
|
||||
)
|
||||
|
||||
type Any struct {
|
||||
Separators []rune
|
||||
}
|
||||
|
||||
func NewAny(s []rune) Any {
|
||||
return Any{s}
|
||||
}
|
||||
|
||||
func (self Any) Match(s string) bool {
|
||||
return strings.IndexAnyRunes(s, self.Separators) == -1
|
||||
}
|
||||
|
||||
func (self Any) Index(s string) (int, []int) {
|
||||
found := strings.IndexAnyRunes(s, self.Separators)
|
||||
switch found {
|
||||
case -1:
|
||||
case 0:
|
||||
return 0, segments0
|
||||
default:
|
||||
s = s[:found]
|
||||
}
|
||||
|
||||
segments := acquireSegments(len(s))
|
||||
for i := range s {
|
||||
segments = append(segments, i)
|
||||
}
|
||||
segments = append(segments, len(s))
|
||||
|
||||
return 0, segments
|
||||
}
|
||||
|
||||
func (self Any) Len() int {
|
||||
return lenNo
|
||||
}
|
||||
|
||||
func (self Any) String() string {
|
||||
return fmt.Sprintf("<any:![%s]>", string(self.Separators))
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
package match
|
||||
|
||||
import "fmt"
|
||||
|
||||
type AnyOf struct {
|
||||
Matchers Matchers
|
||||
}
|
||||
|
||||
func NewAnyOf(m ...Matcher) AnyOf {
|
||||
return AnyOf{Matchers(m)}
|
||||
}
|
||||
|
||||
func (self *AnyOf) Add(m Matcher) error {
|
||||
self.Matchers = append(self.Matchers, m)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (self AnyOf) Match(s string) bool {
|
||||
for _, m := range self.Matchers {
|
||||
if m.Match(s) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (self AnyOf) Index(s string) (int, []int) {
|
||||
index := -1
|
||||
|
||||
segments := acquireSegments(len(s))
|
||||
for _, m := range self.Matchers {
|
||||
idx, seg := m.Index(s)
|
||||
if idx == -1 {
|
||||
continue
|
||||
}
|
||||
|
||||
if index == -1 || idx < index {
|
||||
index = idx
|
||||
segments = append(segments[:0], seg...)
|
||||
continue
|
||||
}
|
||||
|
||||
if idx > index {
|
||||
continue
|
||||
}
|
||||
|
||||
// here idx == index
|
||||
segments = appendMerge(segments, seg)
|
||||
}
|
||||
|
||||
if index == -1 {
|
||||
releaseSegments(segments)
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
return index, segments
|
||||
}
|
||||
|
||||
func (self AnyOf) Len() (l int) {
|
||||
l = -1
|
||||
for _, m := range self.Matchers {
|
||||
ml := m.Len()
|
||||
switch {
|
||||
case l == -1:
|
||||
l = ml
|
||||
continue
|
||||
|
||||
case ml == -1:
|
||||
return -1
|
||||
|
||||
case l != ml:
|
||||
return -1
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (self AnyOf) String() string {
|
||||
return fmt.Sprintf("<any_of:[%s]>", self.Matchers)
|
||||
}
|
|
@ -0,0 +1,146 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type BTree struct {
|
||||
Value Matcher
|
||||
Left Matcher
|
||||
Right Matcher
|
||||
ValueLengthRunes int
|
||||
LeftLengthRunes int
|
||||
RightLengthRunes int
|
||||
LengthRunes int
|
||||
}
|
||||
|
||||
func NewBTree(Value, Left, Right Matcher) (tree BTree) {
|
||||
tree.Value = Value
|
||||
tree.Left = Left
|
||||
tree.Right = Right
|
||||
|
||||
lenOk := true
|
||||
if tree.ValueLengthRunes = Value.Len(); tree.ValueLengthRunes == -1 {
|
||||
lenOk = false
|
||||
}
|
||||
|
||||
if Left != nil {
|
||||
if tree.LeftLengthRunes = Left.Len(); tree.LeftLengthRunes == -1 {
|
||||
lenOk = false
|
||||
}
|
||||
}
|
||||
|
||||
if Right != nil {
|
||||
if tree.RightLengthRunes = Right.Len(); tree.RightLengthRunes == -1 {
|
||||
lenOk = false
|
||||
}
|
||||
}
|
||||
|
||||
if lenOk {
|
||||
tree.LengthRunes = tree.LeftLengthRunes + tree.ValueLengthRunes + tree.RightLengthRunes
|
||||
} else {
|
||||
tree.LengthRunes = -1
|
||||
}
|
||||
|
||||
return tree
|
||||
}
|
||||
|
||||
func (self BTree) Len() int {
|
||||
return self.LengthRunes
|
||||
}
|
||||
|
||||
// todo?
|
||||
func (self BTree) Index(s string) (int, []int) {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
func (self BTree) Match(s string) bool {
|
||||
inputLen := len(s)
|
||||
|
||||
// self.Length, self.RLen and self.LLen are values meaning the length of runes for each part
|
||||
// here we manipulating byte length for better optimizations
|
||||
// but these checks still works, cause minLen of 1-rune string is 1 byte.
|
||||
if self.LengthRunes != -1 && self.LengthRunes > inputLen {
|
||||
return false
|
||||
}
|
||||
|
||||
// try to cut unnecessary parts
|
||||
// by knowledge of length of right and left part
|
||||
var offset, limit int
|
||||
if self.LeftLengthRunes >= 0 {
|
||||
offset = self.LeftLengthRunes
|
||||
}
|
||||
if self.RightLengthRunes >= 0 {
|
||||
limit = inputLen - self.RightLengthRunes
|
||||
} else {
|
||||
limit = inputLen
|
||||
}
|
||||
|
||||
for offset < limit {
|
||||
// search for matching part in substring
|
||||
index, segments := self.Value.Index(s[offset:limit])
|
||||
if index == -1 {
|
||||
releaseSegments(segments)
|
||||
return false
|
||||
}
|
||||
|
||||
l := s[:offset+index]
|
||||
var left bool
|
||||
if self.Left != nil {
|
||||
left = self.Left.Match(l)
|
||||
} else {
|
||||
left = l == ""
|
||||
}
|
||||
|
||||
if left {
|
||||
for i := len(segments) - 1; i >= 0; i-- {
|
||||
length := segments[i]
|
||||
|
||||
var right bool
|
||||
var r string
|
||||
// if there is no string for the right branch
|
||||
if inputLen <= offset+index+length {
|
||||
r = ""
|
||||
} else {
|
||||
r = s[offset+index+length:]
|
||||
}
|
||||
|
||||
if self.Right != nil {
|
||||
right = self.Right.Match(r)
|
||||
} else {
|
||||
right = r == ""
|
||||
}
|
||||
|
||||
if right {
|
||||
releaseSegments(segments)
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_, step := utf8.DecodeRuneInString(s[offset+index:])
|
||||
offset += index + step
|
||||
|
||||
releaseSegments(segments)
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (self BTree) String() string {
|
||||
const n string = "<nil>"
|
||||
var l, r string
|
||||
if self.Left == nil {
|
||||
l = n
|
||||
} else {
|
||||
l = self.Left.String()
|
||||
}
|
||||
if self.Right == nil {
|
||||
r = n
|
||||
} else {
|
||||
r = self.Right.String()
|
||||
}
|
||||
|
||||
return fmt.Sprintf("<btree:[%s<-%s->%s]>", l, self.Value, r)
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Contains struct {
|
||||
Needle string
|
||||
Not bool
|
||||
}
|
||||
|
||||
func NewContains(needle string, not bool) Contains {
|
||||
return Contains{needle, not}
|
||||
}
|
||||
|
||||
func (self Contains) Match(s string) bool {
|
||||
return strings.Contains(s, self.Needle) != self.Not
|
||||
}
|
||||
|
||||
func (self Contains) Index(s string) (int, []int) {
|
||||
var offset int
|
||||
|
||||
idx := strings.Index(s, self.Needle)
|
||||
|
||||
if !self.Not {
|
||||
if idx == -1 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
offset = idx + len(self.Needle)
|
||||
if len(s) <= offset {
|
||||
return 0, []int{offset}
|
||||
}
|
||||
s = s[offset:]
|
||||
} else if idx != -1 {
|
||||
s = s[:idx]
|
||||
}
|
||||
|
||||
segments := acquireSegments(len(s) + 1)
|
||||
for i := range s {
|
||||
segments = append(segments, offset+i)
|
||||
}
|
||||
|
||||
return 0, append(segments, offset+len(s))
|
||||
}
|
||||
|
||||
func (self Contains) Len() int {
|
||||
return lenNo
|
||||
}
|
||||
|
||||
func (self Contains) String() string {
|
||||
var not string
|
||||
if self.Not {
|
||||
not = "!"
|
||||
}
|
||||
return fmt.Sprintf("<contains:%s[%s]>", not, self.Needle)
|
||||
}
|
|
@ -0,0 +1,99 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type EveryOf struct {
|
||||
Matchers Matchers
|
||||
}
|
||||
|
||||
func NewEveryOf(m ...Matcher) EveryOf {
|
||||
return EveryOf{Matchers(m)}
|
||||
}
|
||||
|
||||
func (self *EveryOf) Add(m Matcher) error {
|
||||
self.Matchers = append(self.Matchers, m)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (self EveryOf) Len() (l int) {
|
||||
for _, m := range self.Matchers {
|
||||
if ml := m.Len(); l > 0 {
|
||||
l += ml
|
||||
} else {
|
||||
return -1
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (self EveryOf) Index(s string) (int, []int) {
|
||||
var index int
|
||||
var offset int
|
||||
|
||||
// make `in` with cap as len(s),
|
||||
// cause it is the maximum size of output segments values
|
||||
next := acquireSegments(len(s))
|
||||
current := acquireSegments(len(s))
|
||||
|
||||
sub := s
|
||||
for i, m := range self.Matchers {
|
||||
idx, seg := m.Index(sub)
|
||||
if idx == -1 {
|
||||
releaseSegments(next)
|
||||
releaseSegments(current)
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
if i == 0 {
|
||||
// we use copy here instead of `current = seg`
|
||||
// cause seg is a slice from reusable buffer `in`
|
||||
// and it could be overwritten in next iteration
|
||||
current = append(current, seg...)
|
||||
} else {
|
||||
// clear the next
|
||||
next = next[:0]
|
||||
|
||||
delta := index - (idx + offset)
|
||||
for _, ex := range current {
|
||||
for _, n := range seg {
|
||||
if ex+delta == n {
|
||||
next = append(next, n)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(next) == 0 {
|
||||
releaseSegments(next)
|
||||
releaseSegments(current)
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
current = append(current[:0], next...)
|
||||
}
|
||||
|
||||
index = idx + offset
|
||||
sub = s[index:]
|
||||
offset += idx
|
||||
}
|
||||
|
||||
releaseSegments(next)
|
||||
|
||||
return index, current
|
||||
}
|
||||
|
||||
func (self EveryOf) Match(s string) bool {
|
||||
for _, m := range self.Matchers {
|
||||
if !m.Match(s) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (self EveryOf) String() string {
|
||||
return fmt.Sprintf("<every_of:[%s]>", self.Matchers)
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/gobwas/glob/util/runes"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type List struct {
|
||||
List []rune
|
||||
Not bool
|
||||
}
|
||||
|
||||
func NewList(list []rune, not bool) List {
|
||||
return List{list, not}
|
||||
}
|
||||
|
||||
func (self List) Match(s string) bool {
|
||||
r, w := utf8.DecodeRuneInString(s)
|
||||
if len(s) > w {
|
||||
return false
|
||||
}
|
||||
|
||||
inList := runes.IndexRune(self.List, r) != -1
|
||||
return inList == !self.Not
|
||||
}
|
||||
|
||||
func (self List) Len() int {
|
||||
return lenOne
|
||||
}
|
||||
|
||||
func (self List) Index(s string) (int, []int) {
|
||||
for i, r := range s {
|
||||
if self.Not == (runes.IndexRune(self.List, r) == -1) {
|
||||
return i, segmentsByRuneLength[utf8.RuneLen(r)]
|
||||
}
|
||||
}
|
||||
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
func (self List) String() string {
|
||||
var not string
|
||||
if self.Not {
|
||||
not = "!"
|
||||
}
|
||||
|
||||
return fmt.Sprintf("<list:%s[%s]>", not, string(self.List))
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
package match
|
||||
|
||||
// todo common table of rune's length
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const lenOne = 1
|
||||
const lenZero = 0
|
||||
const lenNo = -1
|
||||
|
||||
type Matcher interface {
|
||||
Match(string) bool
|
||||
Index(string) (int, []int)
|
||||
Len() int
|
||||
String() string
|
||||
}
|
||||
|
||||
type Matchers []Matcher
|
||||
|
||||
func (m Matchers) String() string {
|
||||
var s []string
|
||||
for _, matcher := range m {
|
||||
s = append(s, fmt.Sprint(matcher))
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s", strings.Join(s, ","))
|
||||
}
|
||||
|
||||
// appendMerge merges and sorts given already SORTED and UNIQUE segments.
|
||||
func appendMerge(target, sub []int) []int {
|
||||
lt, ls := len(target), len(sub)
|
||||
out := make([]int, 0, lt+ls)
|
||||
|
||||
for x, y := 0, 0; x < lt || y < ls; {
|
||||
if x >= lt {
|
||||
out = append(out, sub[y:]...)
|
||||
break
|
||||
}
|
||||
|
||||
if y >= ls {
|
||||
out = append(out, target[x:]...)
|
||||
break
|
||||
}
|
||||
|
||||
xValue := target[x]
|
||||
yValue := sub[y]
|
||||
|
||||
switch {
|
||||
|
||||
case xValue == yValue:
|
||||
out = append(out, xValue)
|
||||
x++
|
||||
y++
|
||||
|
||||
case xValue < yValue:
|
||||
out = append(out, xValue)
|
||||
x++
|
||||
|
||||
case yValue < xValue:
|
||||
out = append(out, yValue)
|
||||
y++
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
target = append(target[:0], out...)
|
||||
|
||||
return target
|
||||
}
|
||||
|
||||
func reverseSegments(input []int) {
|
||||
l := len(input)
|
||||
m := l / 2
|
||||
|
||||
for i := 0; i < m; i++ {
|
||||
input[i], input[l-i-1] = input[l-i-1], input[i]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type Max struct {
|
||||
Limit int
|
||||
}
|
||||
|
||||
func NewMax(l int) Max {
|
||||
return Max{l}
|
||||
}
|
||||
|
||||
func (self Max) Match(s string) bool {
|
||||
var l int
|
||||
for range s {
|
||||
l += 1
|
||||
if l > self.Limit {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (self Max) Index(s string) (int, []int) {
|
||||
segments := acquireSegments(self.Limit + 1)
|
||||
segments = append(segments, 0)
|
||||
var count int
|
||||
for i, r := range s {
|
||||
count++
|
||||
if count > self.Limit {
|
||||
break
|
||||
}
|
||||
segments = append(segments, i+utf8.RuneLen(r))
|
||||
}
|
||||
|
||||
return 0, segments
|
||||
}
|
||||
|
||||
func (self Max) Len() int {
|
||||
return lenNo
|
||||
}
|
||||
|
||||
func (self Max) String() string {
|
||||
return fmt.Sprintf("<max:%d>", self.Limit)
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type Min struct {
|
||||
Limit int
|
||||
}
|
||||
|
||||
func NewMin(l int) Min {
|
||||
return Min{l}
|
||||
}
|
||||
|
||||
func (self Min) Match(s string) bool {
|
||||
var l int
|
||||
for range s {
|
||||
l += 1
|
||||
if l >= self.Limit {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (self Min) Index(s string) (int, []int) {
|
||||
var count int
|
||||
|
||||
c := len(s) - self.Limit + 1
|
||||
if c <= 0 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
segments := acquireSegments(c)
|
||||
for i, r := range s {
|
||||
count++
|
||||
if count >= self.Limit {
|
||||
segments = append(segments, i+utf8.RuneLen(r))
|
||||
}
|
||||
}
|
||||
|
||||
if len(segments) == 0 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
return 0, segments
|
||||
}
|
||||
|
||||
func (self Min) Len() int {
|
||||
return lenNo
|
||||
}
|
||||
|
||||
func (self Min) String() string {
|
||||
return fmt.Sprintf("<min:%d>", self.Limit)
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type Nothing struct{}
|
||||
|
||||
func NewNothing() Nothing {
|
||||
return Nothing{}
|
||||
}
|
||||
|
||||
func (self Nothing) Match(s string) bool {
|
||||
return len(s) == 0
|
||||
}
|
||||
|
||||
func (self Nothing) Index(s string) (int, []int) {
|
||||
return 0, segments0
|
||||
}
|
||||
|
||||
func (self Nothing) Len() int {
|
||||
return lenZero
|
||||
}
|
||||
|
||||
func (self Nothing) String() string {
|
||||
return fmt.Sprintf("<nothing>")
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type Prefix struct {
|
||||
Prefix string
|
||||
}
|
||||
|
||||
func NewPrefix(p string) Prefix {
|
||||
return Prefix{p}
|
||||
}
|
||||
|
||||
func (self Prefix) Index(s string) (int, []int) {
|
||||
idx := strings.Index(s, self.Prefix)
|
||||
if idx == -1 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
length := len(self.Prefix)
|
||||
var sub string
|
||||
if len(s) > idx+length {
|
||||
sub = s[idx+length:]
|
||||
} else {
|
||||
sub = ""
|
||||
}
|
||||
|
||||
segments := acquireSegments(len(sub) + 1)
|
||||
segments = append(segments, length)
|
||||
for i, r := range sub {
|
||||
segments = append(segments, length+i+utf8.RuneLen(r))
|
||||
}
|
||||
|
||||
return idx, segments
|
||||
}
|
||||
|
||||
func (self Prefix) Len() int {
|
||||
return lenNo
|
||||
}
|
||||
|
||||
func (self Prefix) Match(s string) bool {
|
||||
return strings.HasPrefix(s, self.Prefix)
|
||||
}
|
||||
|
||||
func (self Prefix) String() string {
|
||||
return fmt.Sprintf("<prefix:%s>", self.Prefix)
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
sutil "github.com/gobwas/glob/util/strings"
|
||||
)
|
||||
|
||||
type PrefixAny struct {
|
||||
Prefix string
|
||||
Separators []rune
|
||||
}
|
||||
|
||||
func NewPrefixAny(s string, sep []rune) PrefixAny {
|
||||
return PrefixAny{s, sep}
|
||||
}
|
||||
|
||||
func (self PrefixAny) Index(s string) (int, []int) {
|
||||
idx := strings.Index(s, self.Prefix)
|
||||
if idx == -1 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
n := len(self.Prefix)
|
||||
sub := s[idx+n:]
|
||||
i := sutil.IndexAnyRunes(sub, self.Separators)
|
||||
if i > -1 {
|
||||
sub = sub[:i]
|
||||
}
|
||||
|
||||
seg := acquireSegments(len(sub) + 1)
|
||||
seg = append(seg, n)
|
||||
for i, r := range sub {
|
||||
seg = append(seg, n+i+utf8.RuneLen(r))
|
||||
}
|
||||
|
||||
return idx, seg
|
||||
}
|
||||
|
||||
func (self PrefixAny) Len() int {
|
||||
return lenNo
|
||||
}
|
||||
|
||||
func (self PrefixAny) Match(s string) bool {
|
||||
if !strings.HasPrefix(s, self.Prefix) {
|
||||
return false
|
||||
}
|
||||
return sutil.IndexAnyRunes(s[len(self.Prefix):], self.Separators) == -1
|
||||
}
|
||||
|
||||
func (self PrefixAny) String() string {
|
||||
return fmt.Sprintf("<prefix_any:%s![%s]>", self.Prefix, string(self.Separators))
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type PrefixSuffix struct {
|
||||
Prefix, Suffix string
|
||||
}
|
||||
|
||||
func NewPrefixSuffix(p, s string) PrefixSuffix {
|
||||
return PrefixSuffix{p, s}
|
||||
}
|
||||
|
||||
func (self PrefixSuffix) Index(s string) (int, []int) {
|
||||
prefixIdx := strings.Index(s, self.Prefix)
|
||||
if prefixIdx == -1 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
suffixLen := len(self.Suffix)
|
||||
if suffixLen <= 0 {
|
||||
return prefixIdx, []int{len(s) - prefixIdx}
|
||||
}
|
||||
|
||||
if (len(s) - prefixIdx) <= 0 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
segments := acquireSegments(len(s) - prefixIdx)
|
||||
for sub := s[prefixIdx:]; ; {
|
||||
suffixIdx := strings.LastIndex(sub, self.Suffix)
|
||||
if suffixIdx == -1 {
|
||||
break
|
||||
}
|
||||
|
||||
segments = append(segments, suffixIdx+suffixLen)
|
||||
sub = sub[:suffixIdx]
|
||||
}
|
||||
|
||||
if len(segments) == 0 {
|
||||
releaseSegments(segments)
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
reverseSegments(segments)
|
||||
|
||||
return prefixIdx, segments
|
||||
}
|
||||
|
||||
func (self PrefixSuffix) Len() int {
|
||||
return lenNo
|
||||
}
|
||||
|
||||
func (self PrefixSuffix) Match(s string) bool {
|
||||
return strings.HasPrefix(s, self.Prefix) && strings.HasSuffix(s, self.Suffix)
|
||||
}
|
||||
|
||||
func (self PrefixSuffix) String() string {
|
||||
return fmt.Sprintf("<prefix_suffix:[%s,%s]>", self.Prefix, self.Suffix)
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type Range struct {
|
||||
Lo, Hi rune
|
||||
Not bool
|
||||
}
|
||||
|
||||
func NewRange(lo, hi rune, not bool) Range {
|
||||
return Range{lo, hi, not}
|
||||
}
|
||||
|
||||
func (self Range) Len() int {
|
||||
return lenOne
|
||||
}
|
||||
|
||||
func (self Range) Match(s string) bool {
|
||||
r, w := utf8.DecodeRuneInString(s)
|
||||
if len(s) > w {
|
||||
return false
|
||||
}
|
||||
|
||||
inRange := r >= self.Lo && r <= self.Hi
|
||||
|
||||
return inRange == !self.Not
|
||||
}
|
||||
|
||||
func (self Range) Index(s string) (int, []int) {
|
||||
for i, r := range s {
|
||||
if self.Not != (r >= self.Lo && r <= self.Hi) {
|
||||
return i, segmentsByRuneLength[utf8.RuneLen(r)]
|
||||
}
|
||||
}
|
||||
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
func (self Range) String() string {
|
||||
var not string
|
||||
if self.Not {
|
||||
not = "!"
|
||||
}
|
||||
return fmt.Sprintf("<range:%s[%s,%s]>", not, string(self.Lo), string(self.Hi))
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type Row struct {
|
||||
Matchers Matchers
|
||||
RunesLength int
|
||||
Segments []int
|
||||
}
|
||||
|
||||
func NewRow(len int, m ...Matcher) Row {
|
||||
return Row{
|
||||
Matchers: Matchers(m),
|
||||
RunesLength: len,
|
||||
Segments: []int{len},
|
||||
}
|
||||
}
|
||||
|
||||
func (self Row) matchAll(s string) bool {
|
||||
var idx int
|
||||
for _, m := range self.Matchers {
|
||||
length := m.Len()
|
||||
|
||||
var next, i int
|
||||
for next = range s[idx:] {
|
||||
i++
|
||||
if i == length {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if i < length || !m.Match(s[idx:idx+next+1]) {
|
||||
return false
|
||||
}
|
||||
|
||||
idx += next + 1
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (self Row) lenOk(s string) bool {
|
||||
var i int
|
||||
for range s {
|
||||
i++
|
||||
if i > self.RunesLength {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return self.RunesLength == i
|
||||
}
|
||||
|
||||
func (self Row) Match(s string) bool {
|
||||
return self.lenOk(s) && self.matchAll(s)
|
||||
}
|
||||
|
||||
func (self Row) Len() (l int) {
|
||||
return self.RunesLength
|
||||
}
|
||||
|
||||
func (self Row) Index(s string) (int, []int) {
|
||||
for i := range s {
|
||||
if len(s[i:]) < self.RunesLength {
|
||||
break
|
||||
}
|
||||
if self.matchAll(s[i:]) {
|
||||
return i, self.Segments
|
||||
}
|
||||
}
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
func (self Row) String() string {
|
||||
return fmt.Sprintf("<row_%d:[%s]>", self.RunesLength, self.Matchers)
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
type SomePool interface {
|
||||
Get() []int
|
||||
Put([]int)
|
||||
}
|
||||
|
||||
var segmentsPools [1024]sync.Pool
|
||||
|
||||
func toPowerOfTwo(v int) int {
|
||||
v--
|
||||
v |= v >> 1
|
||||
v |= v >> 2
|
||||
v |= v >> 4
|
||||
v |= v >> 8
|
||||
v |= v >> 16
|
||||
v++
|
||||
|
||||
return v
|
||||
}
|
||||
|
||||
const (
|
||||
cacheFrom = 16
|
||||
cacheToAndHigher = 1024
|
||||
cacheFromIndex = 15
|
||||
cacheToAndHigherIndex = 1023
|
||||
)
|
||||
|
||||
var (
|
||||
segments0 = []int{0}
|
||||
segments1 = []int{1}
|
||||
segments2 = []int{2}
|
||||
segments3 = []int{3}
|
||||
segments4 = []int{4}
|
||||
)
|
||||
|
||||
var segmentsByRuneLength [5][]int = [5][]int{
|
||||
0: segments0,
|
||||
1: segments1,
|
||||
2: segments2,
|
||||
3: segments3,
|
||||
4: segments4,
|
||||
}
|
||||
|
||||
func init() {
|
||||
for i := cacheToAndHigher; i >= cacheFrom; i >>= 1 {
|
||||
func(i int) {
|
||||
segmentsPools[i-1] = sync.Pool{New: func() interface{} {
|
||||
return make([]int, 0, i)
|
||||
}}
|
||||
}(i)
|
||||
}
|
||||
}
|
||||
|
||||
func getTableIndex(c int) int {
|
||||
p := toPowerOfTwo(c)
|
||||
switch {
|
||||
case p >= cacheToAndHigher:
|
||||
return cacheToAndHigherIndex
|
||||
case p <= cacheFrom:
|
||||
return cacheFromIndex
|
||||
default:
|
||||
return p - 1
|
||||
}
|
||||
}
|
||||
|
||||
func acquireSegments(c int) []int {
|
||||
// make []int with less capacity than cacheFrom
|
||||
// is faster than acquiring it from pool
|
||||
if c < cacheFrom {
|
||||
return make([]int, 0, c)
|
||||
}
|
||||
|
||||
return segmentsPools[getTableIndex(c)].Get().([]int)[:0]
|
||||
}
|
||||
|
||||
func releaseSegments(s []int) {
|
||||
c := cap(s)
|
||||
|
||||
// make []int with less capacity than cacheFrom
|
||||
// is faster than acquiring it from pool
|
||||
if c < cacheFrom {
|
||||
return
|
||||
}
|
||||
|
||||
segmentsPools[getTableIndex(c)].Put(s)
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/gobwas/glob/util/runes"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// single represents ?
|
||||
type Single struct {
|
||||
Separators []rune
|
||||
}
|
||||
|
||||
func NewSingle(s []rune) Single {
|
||||
return Single{s}
|
||||
}
|
||||
|
||||
func (self Single) Match(s string) bool {
|
||||
r, w := utf8.DecodeRuneInString(s)
|
||||
if len(s) > w {
|
||||
return false
|
||||
}
|
||||
|
||||
return runes.IndexRune(self.Separators, r) == -1
|
||||
}
|
||||
|
||||
func (self Single) Len() int {
|
||||
return lenOne
|
||||
}
|
||||
|
||||
func (self Single) Index(s string) (int, []int) {
|
||||
for i, r := range s {
|
||||
if runes.IndexRune(self.Separators, r) == -1 {
|
||||
return i, segmentsByRuneLength[utf8.RuneLen(r)]
|
||||
}
|
||||
}
|
||||
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
func (self Single) String() string {
|
||||
return fmt.Sprintf("<single:![%s]>", string(self.Separators))
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Suffix struct {
|
||||
Suffix string
|
||||
}
|
||||
|
||||
func NewSuffix(s string) Suffix {
|
||||
return Suffix{s}
|
||||
}
|
||||
|
||||
func (self Suffix) Len() int {
|
||||
return lenNo
|
||||
}
|
||||
|
||||
func (self Suffix) Match(s string) bool {
|
||||
return strings.HasSuffix(s, self.Suffix)
|
||||
}
|
||||
|
||||
func (self Suffix) Index(s string) (int, []int) {
|
||||
idx := strings.Index(s, self.Suffix)
|
||||
if idx == -1 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
return 0, []int{idx + len(self.Suffix)}
|
||||
}
|
||||
|
||||
func (self Suffix) String() string {
|
||||
return fmt.Sprintf("<suffix:%s>", self.Suffix)
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
sutil "github.com/gobwas/glob/util/strings"
|
||||
)
|
||||
|
||||
type SuffixAny struct {
|
||||
Suffix string
|
||||
Separators []rune
|
||||
}
|
||||
|
||||
func NewSuffixAny(s string, sep []rune) SuffixAny {
|
||||
return SuffixAny{s, sep}
|
||||
}
|
||||
|
||||
func (self SuffixAny) Index(s string) (int, []int) {
|
||||
idx := strings.Index(s, self.Suffix)
|
||||
if idx == -1 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
i := sutil.LastIndexAnyRunes(s[:idx], self.Separators) + 1
|
||||
|
||||
return i, []int{idx + len(self.Suffix) - i}
|
||||
}
|
||||
|
||||
func (self SuffixAny) Len() int {
|
||||
return lenNo
|
||||
}
|
||||
|
||||
func (self SuffixAny) Match(s string) bool {
|
||||
if !strings.HasSuffix(s, self.Suffix) {
|
||||
return false
|
||||
}
|
||||
return sutil.IndexAnyRunes(s[:len(s)-len(self.Suffix)], self.Separators) == -1
|
||||
}
|
||||
|
||||
func (self SuffixAny) String() string {
|
||||
return fmt.Sprintf("<suffix_any:![%s]%s>", string(self.Separators), self.Suffix)
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type Super struct{}
|
||||
|
||||
func NewSuper() Super {
|
||||
return Super{}
|
||||
}
|
||||
|
||||
func (self Super) Match(s string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (self Super) Len() int {
|
||||
return lenNo
|
||||
}
|
||||
|
||||
func (self Super) Index(s string) (int, []int) {
|
||||
segments := acquireSegments(len(s) + 1)
|
||||
for i := range s {
|
||||
segments = append(segments, i)
|
||||
}
|
||||
segments = append(segments, len(s))
|
||||
|
||||
return 0, segments
|
||||
}
|
||||
|
||||
func (self Super) String() string {
|
||||
return fmt.Sprintf("<super>")
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// raw represents raw string to match
|
||||
type Text struct {
|
||||
Str string
|
||||
RunesLength int
|
||||
BytesLength int
|
||||
Segments []int
|
||||
}
|
||||
|
||||
func NewText(s string) Text {
|
||||
return Text{
|
||||
Str: s,
|
||||
RunesLength: utf8.RuneCountInString(s),
|
||||
BytesLength: len(s),
|
||||
Segments: []int{len(s)},
|
||||
}
|
||||
}
|
||||
|
||||
func (self Text) Match(s string) bool {
|
||||
return self.Str == s
|
||||
}
|
||||
|
||||
func (self Text) Len() int {
|
||||
return self.RunesLength
|
||||
}
|
||||
|
||||
func (self Text) Index(s string) (int, []int) {
|
||||
index := strings.Index(s, self.Str)
|
||||
if index == -1 {
|
||||
return -1, nil
|
||||
}
|
||||
|
||||
return index, self.Segments
|
||||
}
|
||||
|
||||
func (self Text) String() string {
|
||||
return fmt.Sprintf("<text:`%v`>", self.Str)
|
||||
}
|
|
@ -0,0 +1,148 @@
|
|||
# glob.[go](https://golang.org)
|
||||
|
||||
[![GoDoc][godoc-image]][godoc-url] [![Build Status][travis-image]][travis-url]
|
||||
|
||||
> Go Globbing Library.
|
||||
|
||||
## Install
|
||||
|
||||
```shell
|
||||
go get github.com/gobwas/glob
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
```go
|
||||
|
||||
package main
|
||||
|
||||
import "github.com/gobwas/glob"
|
||||
|
||||
func main() {
|
||||
var g glob.Glob
|
||||
|
||||
// create simple glob
|
||||
g = glob.MustCompile("*.github.com")
|
||||
g.Match("api.github.com") // true
|
||||
|
||||
// quote meta characters and then create simple glob
|
||||
g = glob.MustCompile(glob.QuoteMeta("*.github.com"))
|
||||
g.Match("*.github.com") // true
|
||||
|
||||
// create new glob with set of delimiters as ["."]
|
||||
g = glob.MustCompile("api.*.com", '.')
|
||||
g.Match("api.github.com") // true
|
||||
g.Match("api.gi.hub.com") // false
|
||||
|
||||
// create new glob with set of delimiters as ["."]
|
||||
// but now with super wildcard
|
||||
g = glob.MustCompile("api.**.com", '.')
|
||||
g.Match("api.github.com") // true
|
||||
g.Match("api.gi.hub.com") // true
|
||||
|
||||
// create glob with single symbol wildcard
|
||||
g = glob.MustCompile("?at")
|
||||
g.Match("cat") // true
|
||||
g.Match("fat") // true
|
||||
g.Match("at") // false
|
||||
|
||||
// create glob with single symbol wildcard and delimiters ['f']
|
||||
g = glob.MustCompile("?at", 'f')
|
||||
g.Match("cat") // true
|
||||
g.Match("fat") // false
|
||||
g.Match("at") // false
|
||||
|
||||
// create glob with character-list matchers
|
||||
g = glob.MustCompile("[abc]at")
|
||||
g.Match("cat") // true
|
||||
g.Match("bat") // true
|
||||
g.Match("fat") // false
|
||||
g.Match("at") // false
|
||||
|
||||
// create glob with character-list matchers
|
||||
g = glob.MustCompile("[!abc]at")
|
||||
g.Match("cat") // false
|
||||
g.Match("bat") // false
|
||||
g.Match("fat") // true
|
||||
g.Match("at") // false
|
||||
|
||||
// create glob with character-range matchers
|
||||
g = glob.MustCompile("[a-c]at")
|
||||
g.Match("cat") // true
|
||||
g.Match("bat") // true
|
||||
g.Match("fat") // false
|
||||
g.Match("at") // false
|
||||
|
||||
// create glob with character-range matchers
|
||||
g = glob.MustCompile("[!a-c]at")
|
||||
g.Match("cat") // false
|
||||
g.Match("bat") // false
|
||||
g.Match("fat") // true
|
||||
g.Match("at") // false
|
||||
|
||||
// create glob with pattern-alternatives list
|
||||
g = glob.MustCompile("{cat,bat,[fr]at}")
|
||||
g.Match("cat") // true
|
||||
g.Match("bat") // true
|
||||
g.Match("fat") // true
|
||||
g.Match("rat") // true
|
||||
g.Match("at") // false
|
||||
g.Match("zat") // false
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
## Performance
|
||||
|
||||
This library is created for compile-once patterns. This means, that compilation could take time, but
|
||||
strings matching is done faster, than in case when always parsing template.
|
||||
|
||||
If you will not use compiled `glob.Glob` object, and do `g := glob.MustCompile(pattern); g.Match(...)` every time, then your code will be much more slower.
|
||||
|
||||
Run `go test -bench=.` from source root to see the benchmarks:
|
||||
|
||||
Pattern | Fixture | Match | Speed (ns/op)
|
||||
--------|---------|-------|--------------
|
||||
`[a-z][!a-x]*cat*[h][!b]*eyes*` | `my cat has very bright eyes` | `true` | 432
|
||||
`[a-z][!a-x]*cat*[h][!b]*eyes*` | `my dog has very bright eyes` | `false` | 199
|
||||
`https://*.google.*` | `https://account.google.com` | `true` | 96
|
||||
`https://*.google.*` | `https://google.com` | `false` | 66
|
||||
`{https://*.google.*,*yandex.*,*yahoo.*,*mail.ru}` | `http://yahoo.com` | `true` | 163
|
||||
`{https://*.google.*,*yandex.*,*yahoo.*,*mail.ru}` | `http://google.com` | `false` | 197
|
||||
`{https://*gobwas.com,http://exclude.gobwas.com}` | `https://safe.gobwas.com` | `true` | 22
|
||||
`{https://*gobwas.com,http://exclude.gobwas.com}` | `http://safe.gobwas.com` | `false` | 24
|
||||
`abc*` | `abcdef` | `true` | 8.15
|
||||
`abc*` | `af` | `false` | 5.68
|
||||
`*def` | `abcdef` | `true` | 8.84
|
||||
`*def` | `af` | `false` | 5.74
|
||||
`ab*ef` | `abcdef` | `true` | 15.2
|
||||
`ab*ef` | `af` | `false` | 10.4
|
||||
|
||||
The same things with `regexp` package:
|
||||
|
||||
Pattern | Fixture | Match | Speed (ns/op)
|
||||
--------|---------|-------|--------------
|
||||
`^[a-z][^a-x].*cat.*[h][^b].*eyes.*$` | `my cat has very bright eyes` | `true` | 2553
|
||||
`^[a-z][^a-x].*cat.*[h][^b].*eyes.*$` | `my dog has very bright eyes` | `false` | 1383
|
||||
`^https:\/\/.*\.google\..*$` | `https://account.google.com` | `true` | 1205
|
||||
`^https:\/\/.*\.google\..*$` | `https://google.com` | `false` | 767
|
||||
`^(https:\/\/.*\.google\..*|.*yandex\..*|.*yahoo\..*|.*mail\.ru)$` | `http://yahoo.com` | `true` | 1435
|
||||
`^(https:\/\/.*\.google\..*|.*yandex\..*|.*yahoo\..*|.*mail\.ru)$` | `http://google.com` | `false` | 1674
|
||||
`^(https:\/\/.*gobwas\.com|http://exclude.gobwas.com)$` | `https://safe.gobwas.com` | `true` | 1039
|
||||
`^(https:\/\/.*gobwas\.com|http://exclude.gobwas.com)$` | `http://safe.gobwas.com` | `false` | 272
|
||||
`^abc.*$` | `abcdef` | `true` | 237
|
||||
`^abc.*$` | `af` | `false` | 100
|
||||
`^.*def$` | `abcdef` | `true` | 464
|
||||
`^.*def$` | `af` | `false` | 265
|
||||
`^ab.*ef$` | `abcdef` | `true` | 375
|
||||
`^ab.*ef$` | `af` | `false` | 145
|
||||
|
||||
[godoc-image]: https://godoc.org/github.com/gobwas/glob?status.svg
|
||||
[godoc-url]: https://godoc.org/github.com/gobwas/glob
|
||||
[travis-image]: https://travis-ci.org/gobwas/glob.svg?branch=master
|
||||
[travis-url]: https://travis-ci.org/gobwas/glob
|
||||
|
||||
## Syntax
|
||||
|
||||
Syntax is inspired by [standard wildcards](http://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm),
|
||||
except that `**` is aka super-asterisk, that do not sensitive for separators.
|
|
@ -0,0 +1,122 @@
|
|||
package ast
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type Node struct {
|
||||
Parent *Node
|
||||
Children []*Node
|
||||
Value interface{}
|
||||
Kind Kind
|
||||
}
|
||||
|
||||
func NewNode(k Kind, v interface{}, ch ...*Node) *Node {
|
||||
n := &Node{
|
||||
Kind: k,
|
||||
Value: v,
|
||||
}
|
||||
for _, c := range ch {
|
||||
Insert(n, c)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func (a *Node) Equal(b *Node) bool {
|
||||
if a.Kind != b.Kind {
|
||||
return false
|
||||
}
|
||||
if a.Value != b.Value {
|
||||
return false
|
||||
}
|
||||
if len(a.Children) != len(b.Children) {
|
||||
return false
|
||||
}
|
||||
for i, c := range a.Children {
|
||||
if !c.Equal(b.Children[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (a *Node) String() string {
|
||||
var buf bytes.Buffer
|
||||
buf.WriteString(a.Kind.String())
|
||||
if a.Value != nil {
|
||||
buf.WriteString(" =")
|
||||
buf.WriteString(fmt.Sprintf("%v", a.Value))
|
||||
}
|
||||
if len(a.Children) > 0 {
|
||||
buf.WriteString(" [")
|
||||
for i, c := range a.Children {
|
||||
if i > 0 {
|
||||
buf.WriteString(", ")
|
||||
}
|
||||
buf.WriteString(c.String())
|
||||
}
|
||||
buf.WriteString("]")
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
func Insert(parent *Node, children ...*Node) {
|
||||
parent.Children = append(parent.Children, children...)
|
||||
for _, ch := range children {
|
||||
ch.Parent = parent
|
||||
}
|
||||
}
|
||||
|
||||
type List struct {
|
||||
Not bool
|
||||
Chars string
|
||||
}
|
||||
|
||||
type Range struct {
|
||||
Not bool
|
||||
Lo, Hi rune
|
||||
}
|
||||
|
||||
type Text struct {
|
||||
Text string
|
||||
}
|
||||
|
||||
type Kind int
|
||||
|
||||
const (
|
||||
KindNothing Kind = iota
|
||||
KindPattern
|
||||
KindList
|
||||
KindRange
|
||||
KindText
|
||||
KindAny
|
||||
KindSuper
|
||||
KindSingle
|
||||
KindAnyOf
|
||||
)
|
||||
|
||||
func (k Kind) String() string {
|
||||
switch k {
|
||||
case KindNothing:
|
||||
return "Nothing"
|
||||
case KindPattern:
|
||||
return "Pattern"
|
||||
case KindList:
|
||||
return "List"
|
||||
case KindRange:
|
||||
return "Range"
|
||||
case KindText:
|
||||
return "Text"
|
||||
case KindAny:
|
||||
return "Any"
|
||||
case KindSuper:
|
||||
return "Super"
|
||||
case KindSingle:
|
||||
return "Single"
|
||||
case KindAnyOf:
|
||||
return "AnyOf"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
|
@ -0,0 +1,157 @@
|
|||
package ast
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/gobwas/glob/syntax/lexer"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type Lexer interface {
|
||||
Next() lexer.Token
|
||||
}
|
||||
|
||||
type parseFn func(*Node, Lexer) (parseFn, *Node, error)
|
||||
|
||||
func Parse(lexer Lexer) (*Node, error) {
|
||||
var parser parseFn
|
||||
|
||||
root := NewNode(KindPattern, nil)
|
||||
|
||||
var (
|
||||
tree *Node
|
||||
err error
|
||||
)
|
||||
for parser, tree = parserMain, root; parser != nil; {
|
||||
parser, tree, err = parser(tree, lexer)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return root, nil
|
||||
}
|
||||
|
||||
func parserMain(tree *Node, lex Lexer) (parseFn, *Node, error) {
|
||||
for {
|
||||
token := lex.Next()
|
||||
switch token.Type {
|
||||
case lexer.EOF:
|
||||
return nil, tree, nil
|
||||
|
||||
case lexer.Error:
|
||||
return nil, tree, errors.New(token.Raw)
|
||||
|
||||
case lexer.Text:
|
||||
Insert(tree, NewNode(KindText, Text{token.Raw}))
|
||||
return parserMain, tree, nil
|
||||
|
||||
case lexer.Any:
|
||||
Insert(tree, NewNode(KindAny, nil))
|
||||
return parserMain, tree, nil
|
||||
|
||||
case lexer.Super:
|
||||
Insert(tree, NewNode(KindSuper, nil))
|
||||
return parserMain, tree, nil
|
||||
|
||||
case lexer.Single:
|
||||
Insert(tree, NewNode(KindSingle, nil))
|
||||
return parserMain, tree, nil
|
||||
|
||||
case lexer.RangeOpen:
|
||||
return parserRange, tree, nil
|
||||
|
||||
case lexer.TermsOpen:
|
||||
a := NewNode(KindAnyOf, nil)
|
||||
Insert(tree, a)
|
||||
|
||||
p := NewNode(KindPattern, nil)
|
||||
Insert(a, p)
|
||||
|
||||
return parserMain, p, nil
|
||||
|
||||
case lexer.Separator:
|
||||
p := NewNode(KindPattern, nil)
|
||||
Insert(tree.Parent, p)
|
||||
|
||||
return parserMain, p, nil
|
||||
|
||||
case lexer.TermsClose:
|
||||
return parserMain, tree.Parent.Parent, nil
|
||||
|
||||
default:
|
||||
return nil, tree, fmt.Errorf("unexpected token: %s", token)
|
||||
}
|
||||
}
|
||||
return nil, tree, fmt.Errorf("unknown error")
|
||||
}
|
||||
|
||||
func parserRange(tree *Node, lex Lexer) (parseFn, *Node, error) {
|
||||
var (
|
||||
not bool
|
||||
lo rune
|
||||
hi rune
|
||||
chars string
|
||||
)
|
||||
for {
|
||||
token := lex.Next()
|
||||
switch token.Type {
|
||||
case lexer.EOF:
|
||||
return nil, tree, errors.New("unexpected end")
|
||||
|
||||
case lexer.Error:
|
||||
return nil, tree, errors.New(token.Raw)
|
||||
|
||||
case lexer.Not:
|
||||
not = true
|
||||
|
||||
case lexer.RangeLo:
|
||||
r, w := utf8.DecodeRuneInString(token.Raw)
|
||||
if len(token.Raw) > w {
|
||||
return nil, tree, fmt.Errorf("unexpected length of lo character")
|
||||
}
|
||||
lo = r
|
||||
|
||||
case lexer.RangeBetween:
|
||||
//
|
||||
|
||||
case lexer.RangeHi:
|
||||
r, w := utf8.DecodeRuneInString(token.Raw)
|
||||
if len(token.Raw) > w {
|
||||
return nil, tree, fmt.Errorf("unexpected length of lo character")
|
||||
}
|
||||
|
||||
hi = r
|
||||
|
||||
if hi < lo {
|
||||
return nil, tree, fmt.Errorf("hi character '%s' should be greater than lo '%s'", string(hi), string(lo))
|
||||
}
|
||||
|
||||
case lexer.Text:
|
||||
chars = token.Raw
|
||||
|
||||
case lexer.RangeClose:
|
||||
isRange := lo != 0 && hi != 0
|
||||
isChars := chars != ""
|
||||
|
||||
if isChars == isRange {
|
||||
return nil, tree, fmt.Errorf("could not parse range")
|
||||
}
|
||||
|
||||
if isRange {
|
||||
Insert(tree, NewNode(KindRange, Range{
|
||||
Lo: lo,
|
||||
Hi: hi,
|
||||
Not: not,
|
||||
}))
|
||||
} else {
|
||||
Insert(tree, NewNode(KindList, List{
|
||||
Chars: chars,
|
||||
Not: not,
|
||||
}))
|
||||
}
|
||||
|
||||
return parserMain, tree, nil
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,273 @@
|
|||
package lexer
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"github.com/gobwas/glob/util/runes"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
const (
|
||||
char_any = '*'
|
||||
char_comma = ','
|
||||
char_single = '?'
|
||||
char_escape = '\\'
|
||||
char_range_open = '['
|
||||
char_range_close = ']'
|
||||
char_terms_open = '{'
|
||||
char_terms_close = '}'
|
||||
char_range_not = '!'
|
||||
char_range_between = '-'
|
||||
)
|
||||
|
||||
var specials = []byte{
|
||||
char_any,
|
||||
char_single,
|
||||
char_escape,
|
||||
char_range_open,
|
||||
char_range_close,
|
||||
char_terms_open,
|
||||
char_terms_close,
|
||||
}
|
||||
|
||||
func Special(c byte) bool {
|
||||
return bytes.IndexByte(specials, c) != -1
|
||||
}
|
||||
|
||||
type tokens []Token
|
||||
|
||||
func (i *tokens) shift() (ret Token) {
|
||||
ret = (*i)[0]
|
||||
copy(*i, (*i)[1:])
|
||||
*i = (*i)[:len(*i)-1]
|
||||
return
|
||||
}
|
||||
|
||||
func (i *tokens) push(v Token) {
|
||||
*i = append(*i, v)
|
||||
}
|
||||
|
||||
func (i *tokens) empty() bool {
|
||||
return len(*i) == 0
|
||||
}
|
||||
|
||||
var eof rune = 0
|
||||
|
||||
type lexer struct {
|
||||
data string
|
||||
pos int
|
||||
err error
|
||||
|
||||
tokens tokens
|
||||
termsLevel int
|
||||
|
||||
lastRune rune
|
||||
lastRuneSize int
|
||||
hasRune bool
|
||||
}
|
||||
|
||||
func NewLexer(source string) *lexer {
|
||||
l := &lexer{
|
||||
data: source,
|
||||
tokens: tokens(make([]Token, 0, 4)),
|
||||
}
|
||||
return l
|
||||
}
|
||||
|
||||
func (l *lexer) Next() Token {
|
||||
if l.err != nil {
|
||||
return Token{Error, l.err.Error()}
|
||||
}
|
||||
if !l.tokens.empty() {
|
||||
return l.tokens.shift()
|
||||
}
|
||||
|
||||
l.fetchItem()
|
||||
return l.Next()
|
||||
}
|
||||
|
||||
func (l *lexer) peek() (r rune, w int) {
|
||||
if l.pos == len(l.data) {
|
||||
return eof, 0
|
||||
}
|
||||
|
||||
r, w = utf8.DecodeRuneInString(l.data[l.pos:])
|
||||
if r == utf8.RuneError {
|
||||
l.errorf("could not read rune")
|
||||
r = eof
|
||||
w = 0
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (l *lexer) read() rune {
|
||||
if l.hasRune {
|
||||
l.hasRune = false
|
||||
l.seek(l.lastRuneSize)
|
||||
return l.lastRune
|
||||
}
|
||||
|
||||
r, s := l.peek()
|
||||
l.seek(s)
|
||||
|
||||
l.lastRune = r
|
||||
l.lastRuneSize = s
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
func (l *lexer) seek(w int) {
|
||||
l.pos += w
|
||||
}
|
||||
|
||||
func (l *lexer) unread() {
|
||||
if l.hasRune {
|
||||
l.errorf("could not unread rune")
|
||||
return
|
||||
}
|
||||
l.seek(-l.lastRuneSize)
|
||||
l.hasRune = true
|
||||
}
|
||||
|
||||
func (l *lexer) errorf(f string, v ...interface{}) {
|
||||
l.err = fmt.Errorf(f, v...)
|
||||
}
|
||||
|
||||
func (l *lexer) inTerms() bool {
|
||||
return l.termsLevel > 0
|
||||
}
|
||||
|
||||
func (l *lexer) termsEnter() {
|
||||
l.termsLevel++
|
||||
}
|
||||
|
||||
func (l *lexer) termsLeave() {
|
||||
l.termsLevel--
|
||||
}
|
||||
|
||||
var inTextBreakers = []rune{char_single, char_any, char_range_open, char_terms_open}
|
||||
var inTermsBreakers = append(inTextBreakers, char_terms_close, char_comma)
|
||||
|
||||
func (l *lexer) fetchItem() {
|
||||
r := l.read()
|
||||
switch {
|
||||
case r == eof:
|
||||
l.tokens.push(Token{EOF, ""})
|
||||
|
||||
case r == char_terms_open:
|
||||
l.termsEnter()
|
||||
l.tokens.push(Token{TermsOpen, string(r)})
|
||||
|
||||
case r == char_comma && l.inTerms():
|
||||
l.tokens.push(Token{Separator, string(r)})
|
||||
|
||||
case r == char_terms_close && l.inTerms():
|
||||
l.tokens.push(Token{TermsClose, string(r)})
|
||||
l.termsLeave()
|
||||
|
||||
case r == char_range_open:
|
||||
l.tokens.push(Token{RangeOpen, string(r)})
|
||||
l.fetchRange()
|
||||
|
||||
case r == char_single:
|
||||
l.tokens.push(Token{Single, string(r)})
|
||||
|
||||
case r == char_any:
|
||||
if l.read() == char_any {
|
||||
l.tokens.push(Token{Super, string(r) + string(r)})
|
||||
} else {
|
||||
l.unread()
|
||||
l.tokens.push(Token{Any, string(r)})
|
||||
}
|
||||
|
||||
default:
|
||||
l.unread()
|
||||
|
||||
var breakers []rune
|
||||
if l.inTerms() {
|
||||
breakers = inTermsBreakers
|
||||
} else {
|
||||
breakers = inTextBreakers
|
||||
}
|
||||
l.fetchText(breakers)
|
||||
}
|
||||
}
|
||||
|
||||
func (l *lexer) fetchRange() {
|
||||
var wantHi bool
|
||||
var wantClose bool
|
||||
var seenNot bool
|
||||
for {
|
||||
r := l.read()
|
||||
if r == eof {
|
||||
l.errorf("unexpected end of input")
|
||||
return
|
||||
}
|
||||
|
||||
if wantClose {
|
||||
if r != char_range_close {
|
||||
l.errorf("expected close range character")
|
||||
} else {
|
||||
l.tokens.push(Token{RangeClose, string(r)})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if wantHi {
|
||||
l.tokens.push(Token{RangeHi, string(r)})
|
||||
wantClose = true
|
||||
continue
|
||||
}
|
||||
|
||||
if !seenNot && r == char_range_not {
|
||||
l.tokens.push(Token{Not, string(r)})
|
||||
seenNot = true
|
||||
continue
|
||||
}
|
||||
|
||||
if n, w := l.peek(); n == char_range_between {
|
||||
l.seek(w)
|
||||
l.tokens.push(Token{RangeLo, string(r)})
|
||||
l.tokens.push(Token{RangeBetween, string(n)})
|
||||
wantHi = true
|
||||
continue
|
||||
}
|
||||
|
||||
l.unread() // unread first peek and fetch as text
|
||||
l.fetchText([]rune{char_range_close})
|
||||
wantClose = true
|
||||
}
|
||||
}
|
||||
|
||||
func (l *lexer) fetchText(breakers []rune) {
|
||||
var data []rune
|
||||
var escaped bool
|
||||
|
||||
reading:
|
||||
for {
|
||||
r := l.read()
|
||||
if r == eof {
|
||||
break
|
||||
}
|
||||
|
||||
if !escaped {
|
||||
if r == char_escape {
|
||||
escaped = true
|
||||
continue
|
||||
}
|
||||
|
||||
if runes.IndexRune(breakers, r) != -1 {
|
||||
l.unread()
|
||||
break reading
|
||||
}
|
||||
}
|
||||
|
||||
escaped = false
|
||||
data = append(data, r)
|
||||
}
|
||||
|
||||
if len(data) > 0 {
|
||||
l.tokens.push(Token{Text, string(data)})
|
||||
}
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
package lexer
|
||||
|
||||
import "fmt"
|
||||
|
||||
type TokenType int
|
||||
|
||||
const (
|
||||
EOF TokenType = iota
|
||||
Error
|
||||
Text
|
||||
Char
|
||||
Any
|
||||
Super
|
||||
Single
|
||||
Not
|
||||
Separator
|
||||
RangeOpen
|
||||
RangeClose
|
||||
RangeLo
|
||||
RangeHi
|
||||
RangeBetween
|
||||
TermsOpen
|
||||
TermsClose
|
||||
)
|
||||
|
||||
func (tt TokenType) String() string {
|
||||
switch tt {
|
||||
case EOF:
|
||||
return "eof"
|
||||
|
||||
case Error:
|
||||
return "error"
|
||||
|
||||
case Text:
|
||||
return "text"
|
||||
|
||||
case Char:
|
||||
return "char"
|
||||
|
||||
case Any:
|
||||
return "any"
|
||||
|
||||
case Super:
|
||||
return "super"
|
||||
|
||||
case Single:
|
||||
return "single"
|
||||
|
||||
case Not:
|
||||
return "not"
|
||||
|
||||
case Separator:
|
||||
return "separator"
|
||||
|
||||
case RangeOpen:
|
||||
return "range_open"
|
||||
|
||||
case RangeClose:
|
||||
return "range_close"
|
||||
|
||||
case RangeLo:
|
||||
return "range_lo"
|
||||
|
||||
case RangeHi:
|
||||
return "range_hi"
|
||||
|
||||
case RangeBetween:
|
||||
return "range_between"
|
||||
|
||||
case TermsOpen:
|
||||
return "terms_open"
|
||||
|
||||
case TermsClose:
|
||||
return "terms_close"
|
||||
|
||||
default:
|
||||
return "undef"
|
||||
}
|
||||
}
|
||||
|
||||
type Token struct {
|
||||
Type TokenType
|
||||
Raw string
|
||||
}
|
||||
|
||||
func (t Token) String() string {
|
||||
return fmt.Sprintf("%v<%q>", t.Type, t.Raw)
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
package syntax
|
||||
|
||||
import (
|
||||
"github.com/gobwas/glob/syntax/ast"
|
||||
"github.com/gobwas/glob/syntax/lexer"
|
||||
)
|
||||
|
||||
func Parse(s string) (*ast.Node, error) {
|
||||
return ast.Parse(lexer.NewLexer(s))
|
||||
}
|
||||
|
||||
func Special(b byte) bool {
|
||||
return lexer.Special(b)
|
||||
}
|
|
@ -0,0 +1,154 @@
|
|||
package runes
|
||||
|
||||
func Index(s, needle []rune) int {
|
||||
ls, ln := len(s), len(needle)
|
||||
|
||||
switch {
|
||||
case ln == 0:
|
||||
return 0
|
||||
case ln == 1:
|
||||
return IndexRune(s, needle[0])
|
||||
case ln == ls:
|
||||
if Equal(s, needle) {
|
||||
return 0
|
||||
}
|
||||
return -1
|
||||
case ln > ls:
|
||||
return -1
|
||||
}
|
||||
|
||||
head:
|
||||
for i := 0; i < ls && ls-i >= ln; i++ {
|
||||
for y := 0; y < ln; y++ {
|
||||
if s[i+y] != needle[y] {
|
||||
continue head
|
||||
}
|
||||
}
|
||||
|
||||
return i
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
func LastIndex(s, needle []rune) int {
|
||||
ls, ln := len(s), len(needle)
|
||||
|
||||
switch {
|
||||
case ln == 0:
|
||||
if ls == 0 {
|
||||
return 0
|
||||
}
|
||||
return ls
|
||||
case ln == 1:
|
||||
return IndexLastRune(s, needle[0])
|
||||
case ln == ls:
|
||||
if Equal(s, needle) {
|
||||
return 0
|
||||
}
|
||||
return -1
|
||||
case ln > ls:
|
||||
return -1
|
||||
}
|
||||
|
||||
head:
|
||||
for i := ls - 1; i >= 0 && i >= ln; i-- {
|
||||
for y := ln - 1; y >= 0; y-- {
|
||||
if s[i-(ln-y-1)] != needle[y] {
|
||||
continue head
|
||||
}
|
||||
}
|
||||
|
||||
return i - ln + 1
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
// IndexAny returns the index of the first instance of any Unicode code point
|
||||
// from chars in s, or -1 if no Unicode code point from chars is present in s.
|
||||
func IndexAny(s, chars []rune) int {
|
||||
if len(chars) > 0 {
|
||||
for i, c := range s {
|
||||
for _, m := range chars {
|
||||
if c == m {
|
||||
return i
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func Contains(s, needle []rune) bool {
|
||||
return Index(s, needle) >= 0
|
||||
}
|
||||
|
||||
func Max(s []rune) (max rune) {
|
||||
for _, r := range s {
|
||||
if r > max {
|
||||
max = r
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func Min(s []rune) rune {
|
||||
min := rune(-1)
|
||||
for _, r := range s {
|
||||
if min == -1 {
|
||||
min = r
|
||||
continue
|
||||
}
|
||||
|
||||
if r < min {
|
||||
min = r
|
||||
}
|
||||
}
|
||||
|
||||
return min
|
||||
}
|
||||
|
||||
func IndexRune(s []rune, r rune) int {
|
||||
for i, c := range s {
|
||||
if c == r {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func IndexLastRune(s []rune, r rune) int {
|
||||
for i := len(s) - 1; i >= 0; i-- {
|
||||
if s[i] == r {
|
||||
return i
|
||||
}
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
func Equal(a, b []rune) bool {
|
||||
if len(a) == len(b) {
|
||||
for i := 0; i < len(a); i++ {
|
||||
if a[i] != b[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// HasPrefix tests whether the string s begins with prefix.
|
||||
func HasPrefix(s, prefix []rune) bool {
|
||||
return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix)
|
||||
}
|
||||
|
||||
// HasSuffix tests whether the string s ends with suffix.
|
||||
func HasSuffix(s, suffix []rune) bool {
|
||||
return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix)
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
package strings
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func IndexAnyRunes(s string, rs []rune) int {
|
||||
for _, r := range rs {
|
||||
if i := strings.IndexRune(s, r); i != -1 {
|
||||
return i
|
||||
}
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
func LastIndexAnyRunes(s string, rs []rune) int {
|
||||
for _, r := range rs {
|
||||
i := -1
|
||||
if 0 <= r && r < utf8.RuneSelf {
|
||||
i = strings.LastIndexByte(s, byte(r))
|
||||
} else {
|
||||
sub := s
|
||||
for len(sub) > 0 {
|
||||
j := strings.IndexRune(s, r)
|
||||
if j == -1 {
|
||||
break
|
||||
}
|
||||
i = j
|
||||
sub = sub[i+1:]
|
||||
}
|
||||
}
|
||||
if i != -1 {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
comment: false
|
|
@ -0,0 +1,17 @@
|
|||
language: go
|
||||
sudo: false
|
||||
go:
|
||||
- 1.9.x
|
||||
- 1.10.x
|
||||
- 1.11.x
|
||||
- tip
|
||||
script:
|
||||
- go get -u golang.org/x/lint/golint
|
||||
- OUT="$(go get -a)"; test -z "$OUT" || (echo "$OUT" && return 1)
|
||||
- OUT="$(gofmt -l -d ./)"; test -z "$OUT" || (echo "$OUT" && return 1)
|
||||
- OUT="$(golint ./...)"; test -z "$OUT" || (echo "$OUT" && return 1)
|
||||
- go vet -v ./...
|
||||
- go test -race -v -coverprofile=coverage.txt -covermode=atomic ./
|
||||
- go build
|
||||
after_success:
|
||||
- bash <(curl -s https://codecov.io/bash)
|
|
@ -0,0 +1,23 @@
|
|||
# 1.2.0 - 2019.02.13
|
||||
|
||||
- Compatibility with the latest htmlquery package
|
||||
- New request shortcut for HEAD requests
|
||||
- Check URL availibility before visiting
|
||||
- Fix proxy URL value
|
||||
- Request counter fix
|
||||
- Minor fixes in examples
|
||||
|
||||
# 1.1.0 - 2018.08.13
|
||||
|
||||
- Appengine integration takes context.Context instead of http.Request (API change)
|
||||
- Added "Accept" http header by default to every request
|
||||
- Support slices of pointers in unmarshal
|
||||
- Fixed a race condition in queues
|
||||
- ForEachWithBreak method added to HTMLElement
|
||||
- Added a local file example
|
||||
- Support gzip decompression of response bodies
|
||||
- Don't share waitgroup when cloning a collector
|
||||
- Fixed instagram example
|
||||
|
||||
|
||||
# 1.0.0 - 2018.05.13
|
|
@ -0,0 +1,67 @@
|
|||
# Contribute
|
||||
|
||||
## Introduction
|
||||
|
||||
First, thank you for considering contributing to colly! It's people like you that make the open source community such a great community! 😊
|
||||
|
||||
We welcome any type of contribution, not only code. You can help with
|
||||
- **QA**: file bug reports, the more details you can give the better (e.g. screenshots with the console open)
|
||||
- **Marketing**: writing blog posts, howto's, printing stickers, ...
|
||||
- **Community**: presenting the project at meetups, organizing a dedicated meetup for the local community, ...
|
||||
- **Code**: take a look at the [open issues](https://github.com/gocolly/colly/issues). Even if you can't write code, commenting on them, showing that you care about a given issue matters. It helps us triage them.
|
||||
- **Money**: we welcome financial contributions in full transparency on our [open collective](https://opencollective.com/colly).
|
||||
|
||||
## Your First Contribution
|
||||
|
||||
Working on your first Pull Request? You can learn how from this *free* series, [How to Contribute to an Open Source Project on GitHub](https://egghead.io/series/how-to-contribute-to-an-open-source-project-on-github).
|
||||
|
||||
## Submitting code
|
||||
|
||||
Any code change should be submitted as a pull request. The description should explain what the code does and give steps to execute it. The pull request should also contain tests.
|
||||
|
||||
## Code review process
|
||||
|
||||
The bigger the pull request, the longer it will take to review and merge. Try to break down large pull requests in smaller chunks that are easier to review and merge.
|
||||
It is also always helpful to have some context for your pull request. What was the purpose? Why does it matter to you?
|
||||
|
||||
## Financial contributions
|
||||
|
||||
We also welcome financial contributions in full transparency on our [open collective](https://opencollective.com/colly).
|
||||
Anyone can file an expense. If the expense makes sense for the development of the community, it will be "merged" in the ledger of our open collective by the core contributors and the person who filed the expense will be reimbursed.
|
||||
|
||||
## Questions
|
||||
|
||||
If you have any questions, create an [issue](https://github.com/gocolly/colly/issues/new) (protip: do a quick search first to see if someone else didn't ask the same question before!).
|
||||
You can also reach us at hello@colly.opencollective.com.
|
||||
|
||||
## Credits
|
||||
|
||||
### Contributors
|
||||
|
||||
Thank you to all the people who have already contributed to colly!
|
||||
<a href="graphs/contributors"><img src="https://opencollective.com/colly/contributors.svg?width=890" /></a>
|
||||
|
||||
|
||||
### Backers
|
||||
|
||||
Thank you to all our backers! [[Become a backer](https://opencollective.com/colly#backer)]
|
||||
|
||||
<a href="https://opencollective.com/colly#backers" target="_blank"><img src="https://opencollective.com/colly/backers.svg?width=890"></a>
|
||||
|
||||
|
||||
### Sponsors
|
||||
|
||||
Thank you to all our sponsors! (please ask your company to also support this open source project by [becoming a sponsor](https://opencollective.com/colly#sponsor))
|
||||
|
||||
<a href="https://opencollective.com/colly/sponsor/0/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/0/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/colly/sponsor/1/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/1/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/colly/sponsor/2/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/2/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/colly/sponsor/3/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/3/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/colly/sponsor/4/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/4/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/colly/sponsor/5/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/5/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/colly/sponsor/6/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/6/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/colly/sponsor/7/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/7/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/colly/sponsor/8/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/8/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/colly/sponsor/9/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/9/avatar.svg"></a>
|
||||
|
||||
<!-- This `CONTRIBUTING.md` is based on @nayafia's template https://github.com/nayafia/contributing-template -->
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,112 @@
|
|||
# Colly
|
||||
|
||||
Lightning Fast and Elegant Scraping Framework for Gophers
|
||||
|
||||
Colly provides a clean interface to write any kind of crawler/scraper/spider.
|
||||
|
||||
With Colly you can easily extract structured data from websites, which can be used for a wide range of applications, like data mining, data processing or archiving.
|
||||
|
||||
[![GoDoc](https://godoc.org/github.com/gocolly/colly?status.svg)](https://godoc.org/github.com/gocolly/colly)
|
||||
[![Backers on Open Collective](https://opencollective.com/colly/backers/badge.svg)](#backers) [![Sponsors on Open Collective](https://opencollective.com/colly/sponsors/badge.svg)](#sponsors) [![build status](https://img.shields.io/travis/gocolly/colly/master.svg?style=flat-square)](https://travis-ci.org/gocolly/colly)
|
||||
[![report card](https://img.shields.io/badge/report%20card-a%2B-ff3333.svg?style=flat-square)](http://goreportcard.com/report/gocolly/colly)
|
||||
[![view examples](https://img.shields.io/badge/learn%20by-examples-0077b3.svg?style=flat-square)](https://github.com/gocolly/colly/tree/master/_examples)
|
||||
[![Code Coverage](https://img.shields.io/codecov/c/github/gocolly/colly/master.svg)](https://codecov.io/github/gocolly/colly?branch=master)
|
||||
[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fgocolly%2Fcolly.svg?type=shield)](https://app.fossa.io/projects/git%2Bgithub.com%2Fgocolly%2Fcolly?ref=badge_shield)
|
||||
[![Twitter URL](https://img.shields.io/badge/twitter-follow-green.svg)](https://twitter.com/gocolly)
|
||||
|
||||
|
||||
## Features
|
||||
|
||||
* Clean API
|
||||
* Fast (>1k request/sec on a single core)
|
||||
* Manages request delays and maximum concurrency per domain
|
||||
* Automatic cookie and session handling
|
||||
* Sync/async/parallel scraping
|
||||
* Caching
|
||||
* Automatic encoding of non-unicode responses
|
||||
* Robots.txt support
|
||||
* Distributed scraping
|
||||
* Configuration via environment variables
|
||||
* Extensions
|
||||
|
||||
|
||||
## Example
|
||||
|
||||
```go
|
||||
func main() {
|
||||
c := colly.NewCollector()
|
||||
|
||||
// Find and visit all links
|
||||
c.OnHTML("a[href]", func(e *colly.HTMLElement) {
|
||||
e.Request.Visit(e.Attr("href"))
|
||||
})
|
||||
|
||||
c.OnRequest(func(r *colly.Request) {
|
||||
fmt.Println("Visiting", r.URL)
|
||||
})
|
||||
|
||||
c.Visit("http://go-colly.org/")
|
||||
}
|
||||
```
|
||||
|
||||
See [examples folder](https://github.com/gocolly/colly/tree/master/_examples) for more detailed examples.
|
||||
|
||||
|
||||
## Installation
|
||||
|
||||
```
|
||||
go get -u github.com/gocolly/colly/...
|
||||
```
|
||||
|
||||
|
||||
## Bugs
|
||||
|
||||
Bugs or suggestions? Visit the [issue tracker](https://github.com/gocolly/colly/issues) or join `#colly` on freenode
|
||||
|
||||
|
||||
## Other Projects Using Colly
|
||||
|
||||
Below is a list of public, open source projects that use Colly:
|
||||
|
||||
* [greenpeace/check-my-pages](https://github.com/greenpeace/check-my-pages) Scraping script to test the Spanish Greenpeace web archive
|
||||
* [altsab/gowap](https://github.com/altsab/gowap) Wappalyzer implementation in Go
|
||||
* [jesuiscamille/goquotes](https://github.com/jesuiscamille/goquotes) A quotes scrapper, making your day a little better!
|
||||
* [jivesearch/jivesearch](https://github.com/jivesearch/jivesearch) A search engine that doesn't track you.
|
||||
* [Leagify/colly-draft-prospects](https://github.com/Leagify/colly-draft-prospects) A scraper for future NFL Draft prospects.
|
||||
* [lucasepe/go-ps4](https://github.com/lucasepe/go-ps4) Search playstation store for your favorite PS4 games using the command line.
|
||||
|
||||
If you are using Colly in a project please send a pull request to add it to the list.
|
||||
|
||||
## Contributors
|
||||
|
||||
This project exists thanks to all the people who contribute. [[Contribute]](CONTRIBUTING.md).
|
||||
<a href="https://github.com/gocolly/colly/graphs/contributors"><img src="https://opencollective.com/colly/contributors.svg?width=890" /></a>
|
||||
|
||||
|
||||
## Backers
|
||||
|
||||
Thank you to all our backers! 🙏 [[Become a backer](https://opencollective.com/colly#backer)]
|
||||
|
||||
<a href="https://opencollective.com/colly#backers" target="_blank"><img src="https://opencollective.com/colly/backers.svg?width=890"></a>
|
||||
|
||||
|
||||
## Sponsors
|
||||
|
||||
Support this project by becoming a sponsor. Your logo will show up here with a link to your website. [[Become a sponsor](https://opencollective.com/colly#sponsor)]
|
||||
|
||||
<a href="https://opencollective.com/colly/sponsor/0/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/0/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/colly/sponsor/1/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/1/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/colly/sponsor/2/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/2/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/colly/sponsor/3/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/3/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/colly/sponsor/4/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/4/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/colly/sponsor/5/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/5/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/colly/sponsor/6/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/6/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/colly/sponsor/7/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/7/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/colly/sponsor/8/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/8/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/colly/sponsor/9/website" target="_blank"><img src="https://opencollective.com/colly/sponsor/9/avatar.svg"></a>
|
||||
|
||||
|
||||
|
||||
|
||||
## License
|
||||
[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Fgocolly%2Fcolly.svg?type=large)](https://app.fossa.io/projects/git%2Bgithub.com%2Fgocolly%2Fcolly?ref=badge_large)
|
|
@ -0,0 +1 @@
|
|||
1.2.0
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,87 @@
|
|||
// Copyright 2018 Adam Tauber
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package colly
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Context provides a tiny layer for passing data between callbacks
|
||||
type Context struct {
|
||||
contextMap map[string]interface{}
|
||||
lock *sync.RWMutex
|
||||
}
|
||||
|
||||
// NewContext initializes a new Context instance
|
||||
func NewContext() *Context {
|
||||
return &Context{
|
||||
contextMap: make(map[string]interface{}),
|
||||
lock: &sync.RWMutex{},
|
||||
}
|
||||
}
|
||||
|
||||
// UnmarshalBinary decodes Context value to nil
|
||||
// This function is used by request caching
|
||||
func (c *Context) UnmarshalBinary(_ []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// MarshalBinary encodes Context value
|
||||
// This function is used by request caching
|
||||
func (c *Context) MarshalBinary() (_ []byte, _ error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Put stores a value of any type in Context
|
||||
func (c *Context) Put(key string, value interface{}) {
|
||||
c.lock.Lock()
|
||||
c.contextMap[key] = value
|
||||
c.lock.Unlock()
|
||||
}
|
||||
|
||||
// Get retrieves a string value from Context.
|
||||
// Get returns an empty string if key not found
|
||||
func (c *Context) Get(key string) string {
|
||||
c.lock.RLock()
|
||||
defer c.lock.RUnlock()
|
||||
if v, ok := c.contextMap[key]; ok {
|
||||
return v.(string)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// GetAny retrieves a value from Context.
|
||||
// GetAny returns nil if key not found
|
||||
func (c *Context) GetAny(key string) interface{} {
|
||||
c.lock.RLock()
|
||||
defer c.lock.RUnlock()
|
||||
if v, ok := c.contextMap[key]; ok {
|
||||
return v
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ForEach iterate context
|
||||
func (c *Context) ForEach(fn func(k string, v interface{}) interface{}) []interface{} {
|
||||
c.lock.RLock()
|
||||
defer c.lock.RUnlock()
|
||||
|
||||
ret := make([]interface{}, 0, len(c.contextMap))
|
||||
for k, v := range c.contextMap {
|
||||
ret = append(ret, fn(k, v))
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
// Copyright 2018 Adam Tauber
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package debug
|
||||
|
||||
// Event represents an action inside a collector
|
||||
type Event struct {
|
||||
// Type is the type of the event
|
||||
Type string
|
||||
// RequestID identifies the HTTP request of the Event
|
||||
RequestID uint32
|
||||
// CollectorID identifies the collector of the Event
|
||||
CollectorID uint32
|
||||
// Values contains the event's key-value pairs. Different type of events
|
||||
// can return different key-value pairs
|
||||
Values map[string]string
|
||||
}
|
||||
|
||||
// Debugger is an interface for different type of debugging backends
|
||||
type Debugger interface {
|
||||
// Init initializes the backend
|
||||
Init() error
|
||||
// Event receives a new collector event.
|
||||
Event(e *Event)
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright 2018 Adam Tauber
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package debug
|
||||
|
||||
import (
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
// LogDebugger is the simplest debugger which prints log messages to the STDERR
|
||||
type LogDebugger struct {
|
||||
// Output is the log destination, anything can be used which implements them
|
||||
// io.Writer interface. Leave it blank to use STDERR
|
||||
Output io.Writer
|
||||
// Prefix appears at the beginning of each generated log line
|
||||
Prefix string
|
||||
// Flag defines the logging properties.
|
||||
Flag int
|
||||
logger *log.Logger
|
||||
counter int32
|
||||
start time.Time
|
||||
}
|
||||
|
||||
// Init initializes the LogDebugger
|
||||
func (l *LogDebugger) Init() error {
|
||||
l.counter = 0
|
||||
l.start = time.Now()
|
||||
if l.Output == nil {
|
||||
l.Output = os.Stderr
|
||||
}
|
||||
l.logger = log.New(l.Output, l.Prefix, l.Flag)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Event receives Collector events and prints them to STDERR
|
||||
func (l *LogDebugger) Event(e *Event) {
|
||||
i := atomic.AddInt32(&l.counter, 1)
|
||||
l.logger.Printf("[%06d] %d [%6d - %s] %q (%s)\n", i, e.CollectorID, e.RequestID, e.Type, e.Values, time.Since(l.start))
|
||||
}
|
|
@ -0,0 +1,146 @@
|
|||
// Copyright 2018 Adam Tauber
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package debug
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
// WebDebugger is a web based debuging frontend for colly
|
||||
type WebDebugger struct {
|
||||
// Address is the address of the web server. It is 127.0.0.1:7676 by default.
|
||||
Address string
|
||||
initialized bool
|
||||
CurrentRequests map[uint32]requestInfo
|
||||
RequestLog []requestInfo
|
||||
}
|
||||
|
||||
type requestInfo struct {
|
||||
URL string
|
||||
Started time.Time
|
||||
Duration time.Duration
|
||||
ResponseStatus string
|
||||
ID uint32
|
||||
CollectorID uint32
|
||||
}
|
||||
|
||||
// Init initializes the WebDebugger
|
||||
func (w *WebDebugger) Init() error {
|
||||
if w.initialized {
|
||||
return nil
|
||||
}
|
||||
defer func() {
|
||||
w.initialized = true
|
||||
}()
|
||||
if w.Address == "" {
|
||||
w.Address = "127.0.0.1:7676"
|
||||
}
|
||||
w.RequestLog = make([]requestInfo, 0)
|
||||
w.CurrentRequests = make(map[uint32]requestInfo)
|
||||
http.HandleFunc("/", w.indexHandler)
|
||||
http.HandleFunc("/status", w.statusHandler)
|
||||
log.Println("Starting debug webserver on", w.Address)
|
||||
go http.ListenAndServe(w.Address, nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Event updates the debugger's status
|
||||
func (w *WebDebugger) Event(e *Event) {
|
||||
switch e.Type {
|
||||
case "request":
|
||||
w.CurrentRequests[e.RequestID] = requestInfo{
|
||||
URL: e.Values["url"],
|
||||
Started: time.Now(),
|
||||
ID: e.RequestID,
|
||||
CollectorID: e.CollectorID,
|
||||
}
|
||||
case "response", "error":
|
||||
r := w.CurrentRequests[e.RequestID]
|
||||
r.Duration = time.Since(r.Started)
|
||||
r.ResponseStatus = e.Values["status"]
|
||||
w.RequestLog = append(w.RequestLog, r)
|
||||
delete(w.CurrentRequests, e.RequestID)
|
||||
}
|
||||
}
|
||||
|
||||
func (w *WebDebugger) indexHandler(wr http.ResponseWriter, r *http.Request) {
|
||||
wr.Write([]byte(`<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Colly Debugger WebUI</title>
|
||||
<script src="https://code.jquery.com/jquery-latest.min.js" type="text/javascript"></script>
|
||||
<link rel="stylesheet" type="text/css" href="https://semantic-ui.com/dist/semantic.min.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="ui inverted vertical masthead center aligned segment" id="menu">
|
||||
<div class="ui tiny secondary inverted menu">
|
||||
<a class="item" href="/"><b>Colly WebDebugger</b></a>
|
||||
</div>
|
||||
</div>
|
||||
<div class="ui grid container">
|
||||
<div class="row">
|
||||
<div class="eight wide column">
|
||||
<h1>Current Requests <span id="current_request_count"></span></h1>
|
||||
<div id="current_requests" class="ui small feed"></div>
|
||||
</div>
|
||||
<div class="eight wide column">
|
||||
<h1>Finished Requests <span id="request_log_count"></span></h1>
|
||||
<div id="request_log" class="ui small feed"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
function curRequestTpl(url, started, collectorId) {
|
||||
return '<div class="event"><div class="content"><div class="summary">' + url + '</div><div class="meta">Collector #' + collectorId + ' - ' + started + "</div></div></div>";
|
||||
}
|
||||
function requestLogTpl(url, duration, collectorId) {
|
||||
return '<div class="event"><div class="content"><div class="summary">' + url + '</div><div class="meta">Collector #' + collectorId + ' - ' + (duration/1000000000) + "s</div></div></div>";
|
||||
}
|
||||
function fetchStatus() {
|
||||
$.getJSON("/status", function(data) {
|
||||
$("#current_requests").html("");
|
||||
$("#request_log").html("");
|
||||
$("#current_request_count").text('(' + Object.keys(data.CurrentRequests).length + ')');
|
||||
$("#request_log_count").text('(' + data.RequestLog.length + ')');
|
||||
for(var i in data.CurrentRequests) {
|
||||
var r = data.CurrentRequests[i];
|
||||
$("#current_requests").append(curRequestTpl(r.Url, r.Started, r.CollectorId));
|
||||
}
|
||||
for(var i in data.RequestLog.reverse()) {
|
||||
var r = data.RequestLog[i];
|
||||
$("#request_log").append(requestLogTpl(r.Url, r.Duration, r.CollectorId));
|
||||
}
|
||||
setTimeout(fetchStatus, 1000);
|
||||
});
|
||||
}
|
||||
$(document).ready(function() {
|
||||
fetchStatus();
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
`))
|
||||
}
|
||||
|
||||
func (w *WebDebugger) statusHandler(wr http.ResponseWriter, r *http.Request) {
|
||||
jsonData, err := json.MarshalIndent(w, "", " ")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
wr.Write(jsonData)
|
||||
}
|
|
@ -0,0 +1,120 @@
|
|||
// Copyright 2018 Adam Tauber
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package colly
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// HTMLElement is the representation of a HTML tag.
|
||||
type HTMLElement struct {
|
||||
// Name is the name of the tag
|
||||
Name string
|
||||
Text string
|
||||
attributes []html.Attribute
|
||||
// Request is the request object of the element's HTML document
|
||||
Request *Request
|
||||
// Response is the Response object of the element's HTML document
|
||||
Response *Response
|
||||
// DOM is the goquery parsed DOM object of the page. DOM is relative
|
||||
// to the current HTMLElement
|
||||
DOM *goquery.Selection
|
||||
// Index stores the position of the current element within all the elements matched by an OnHTML callback
|
||||
Index int
|
||||
}
|
||||
|
||||
// NewHTMLElementFromSelectionNode creates a HTMLElement from a goquery.Selection Node.
|
||||
func NewHTMLElementFromSelectionNode(resp *Response, s *goquery.Selection, n *html.Node, idx int) *HTMLElement {
|
||||
return &HTMLElement{
|
||||
Name: n.Data,
|
||||
Request: resp.Request,
|
||||
Response: resp,
|
||||
Text: goquery.NewDocumentFromNode(n).Text(),
|
||||
DOM: s,
|
||||
Index: idx,
|
||||
attributes: n.Attr,
|
||||
}
|
||||
}
|
||||
|
||||
// Attr returns the selected attribute of a HTMLElement or empty string
|
||||
// if no attribute found
|
||||
func (h *HTMLElement) Attr(k string) string {
|
||||
for _, a := range h.attributes {
|
||||
if a.Key == k {
|
||||
return a.Val
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// ChildText returns the concatenated and stripped text content of the matching
|
||||
// elements.
|
||||
func (h *HTMLElement) ChildText(goquerySelector string) string {
|
||||
return strings.TrimSpace(h.DOM.Find(goquerySelector).Text())
|
||||
}
|
||||
|
||||
// ChildAttr returns the stripped text content of the first matching
|
||||
// element's attribute.
|
||||
func (h *HTMLElement) ChildAttr(goquerySelector, attrName string) string {
|
||||
if attr, ok := h.DOM.Find(goquerySelector).Attr(attrName); ok {
|
||||
return strings.TrimSpace(attr)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// ChildAttrs returns the stripped text content of all the matching
|
||||
// element's attributes.
|
||||
func (h *HTMLElement) ChildAttrs(goquerySelector, attrName string) []string {
|
||||
var res []string
|
||||
h.DOM.Find(goquerySelector).Each(func(_ int, s *goquery.Selection) {
|
||||
if attr, ok := s.Attr(attrName); ok {
|
||||
res = append(res, strings.TrimSpace(attr))
|
||||
}
|
||||
})
|
||||
return res
|
||||
}
|
||||
|
||||
// ForEach iterates over the elements matched by the first argument
|
||||
// and calls the callback function on every HTMLElement match.
|
||||
func (h *HTMLElement) ForEach(goquerySelector string, callback func(int, *HTMLElement)) {
|
||||
i := 0
|
||||
h.DOM.Find(goquerySelector).Each(func(_ int, s *goquery.Selection) {
|
||||
for _, n := range s.Nodes {
|
||||
callback(i, NewHTMLElementFromSelectionNode(h.Response, s, n, i))
|
||||
i++
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// ForEachWithBreak iterates over the elements matched by the first argument
|
||||
// and calls the callback function on every HTMLElement match.
|
||||
// It is identical to ForEach except that it is possible to break
|
||||
// out of the loop by returning false in the callback function. It returns the
|
||||
// current Selection object.
|
||||
func (h *HTMLElement) ForEachWithBreak(goquerySelector string, callback func(int, *HTMLElement) bool) {
|
||||
i := 0
|
||||
h.DOM.Find(goquerySelector).EachWithBreak(func(_ int, s *goquery.Selection) bool {
|
||||
for _, n := range s.Nodes {
|
||||
if callback(i, NewHTMLElementFromSelectionNode(h.Response, s, n, i)) {
|
||||
i++
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
})
|
||||
}
|
|
@ -0,0 +1,227 @@
|
|||
// Copyright 2018 Adam Tauber
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package colly
|
||||
|
||||
import (
|
||||
"crypto/sha1"
|
||||
"encoding/gob"
|
||||
"encoding/hex"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"regexp"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"compress/gzip"
|
||||
|
||||
"github.com/gobwas/glob"
|
||||
)
|
||||
|
||||
type httpBackend struct {
|
||||
LimitRules []*LimitRule
|
||||
Client *http.Client
|
||||
lock *sync.RWMutex
|
||||
}
|
||||
|
||||
// LimitRule provides connection restrictions for domains.
|
||||
// Both DomainRegexp and DomainGlob can be used to specify
|
||||
// the included domains patterns, but at least one is required.
|
||||
// There can be two kind of limitations:
|
||||
// - Parallelism: Set limit for the number of concurrent requests to matching domains
|
||||
// - Delay: Wait specified amount of time between requests (parallelism is 1 in this case)
|
||||
type LimitRule struct {
|
||||
// DomainRegexp is a regular expression to match against domains
|
||||
DomainRegexp string
|
||||
// DomainRegexp is a glob pattern to match against domains
|
||||
DomainGlob string
|
||||
// Delay is the duration to wait before creating a new request to the matching domains
|
||||
Delay time.Duration
|
||||
// RandomDelay is the extra randomized duration to wait added to Delay before creating a new request
|
||||
RandomDelay time.Duration
|
||||
// Parallelism is the number of the maximum allowed concurrent requests of the matching domains
|
||||
Parallelism int
|
||||
waitChan chan bool
|
||||
compiledRegexp *regexp.Regexp
|
||||
compiledGlob glob.Glob
|
||||
}
|
||||
|
||||
// Init initializes the private members of LimitRule
|
||||
func (r *LimitRule) Init() error {
|
||||
waitChanSize := 1
|
||||
if r.Parallelism > 1 {
|
||||
waitChanSize = r.Parallelism
|
||||
}
|
||||
r.waitChan = make(chan bool, waitChanSize)
|
||||
hasPattern := false
|
||||
if r.DomainRegexp != "" {
|
||||
c, err := regexp.Compile(r.DomainRegexp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
r.compiledRegexp = c
|
||||
hasPattern = true
|
||||
}
|
||||
if r.DomainGlob != "" {
|
||||
c, err := glob.Compile(r.DomainGlob)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
r.compiledGlob = c
|
||||
hasPattern = true
|
||||
}
|
||||
if !hasPattern {
|
||||
return ErrNoPattern
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *httpBackend) Init(jar http.CookieJar) {
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
h.Client = &http.Client{
|
||||
Jar: jar,
|
||||
Timeout: 10 * time.Second,
|
||||
}
|
||||
h.lock = &sync.RWMutex{}
|
||||
}
|
||||
|
||||
// Match checks that the domain parameter triggers the rule
|
||||
func (r *LimitRule) Match(domain string) bool {
|
||||
match := false
|
||||
if r.compiledRegexp != nil && r.compiledRegexp.MatchString(domain) {
|
||||
match = true
|
||||
}
|
||||
if r.compiledGlob != nil && r.compiledGlob.Match(domain) {
|
||||
match = true
|
||||
}
|
||||
return match
|
||||
}
|
||||
|
||||
func (h *httpBackend) GetMatchingRule(domain string) *LimitRule {
|
||||
if h.LimitRules == nil {
|
||||
return nil
|
||||
}
|
||||
h.lock.RLock()
|
||||
defer h.lock.RUnlock()
|
||||
for _, r := range h.LimitRules {
|
||||
if r.Match(domain) {
|
||||
return r
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *httpBackend) Cache(request *http.Request, bodySize int, cacheDir string) (*Response, error) {
|
||||
if cacheDir == "" || request.Method != "GET" {
|
||||
return h.Do(request, bodySize)
|
||||
}
|
||||
sum := sha1.Sum([]byte(request.URL.String()))
|
||||
hash := hex.EncodeToString(sum[:])
|
||||
dir := path.Join(cacheDir, hash[:2])
|
||||
filename := path.Join(dir, hash)
|
||||
if file, err := os.Open(filename); err == nil {
|
||||
resp := new(Response)
|
||||
err := gob.NewDecoder(file).Decode(resp)
|
||||
file.Close()
|
||||
if resp.StatusCode < 500 {
|
||||
return resp, err
|
||||
}
|
||||
}
|
||||
resp, err := h.Do(request, bodySize)
|
||||
if err != nil || resp.StatusCode >= 500 {
|
||||
return resp, err
|
||||
}
|
||||
if _, err := os.Stat(dir); err != nil {
|
||||
if err := os.MkdirAll(dir, 0750); err != nil {
|
||||
return resp, err
|
||||
}
|
||||
}
|
||||
file, err := os.Create(filename + "~")
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
if err := gob.NewEncoder(file).Encode(resp); err != nil {
|
||||
file.Close()
|
||||
return resp, err
|
||||
}
|
||||
file.Close()
|
||||
return resp, os.Rename(filename+"~", filename)
|
||||
}
|
||||
|
||||
func (h *httpBackend) Do(request *http.Request, bodySize int) (*Response, error) {
|
||||
r := h.GetMatchingRule(request.URL.Host)
|
||||
if r != nil {
|
||||
r.waitChan <- true
|
||||
defer func(r *LimitRule) {
|
||||
randomDelay := time.Duration(0)
|
||||
if r.RandomDelay != 0 {
|
||||
randomDelay = time.Duration(rand.Int63n(int64(r.RandomDelay)))
|
||||
}
|
||||
time.Sleep(r.Delay + randomDelay)
|
||||
<-r.waitChan
|
||||
}(r)
|
||||
}
|
||||
|
||||
res, err := h.Client.Do(request)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if res.Request != nil {
|
||||
*request = *res.Request
|
||||
}
|
||||
|
||||
var bodyReader io.Reader = res.Body
|
||||
if bodySize > 0 {
|
||||
bodyReader = io.LimitReader(bodyReader, int64(bodySize))
|
||||
}
|
||||
if !res.Uncompressed && res.Header.Get("Content-Encoding") == "gzip" {
|
||||
bodyReader, err = gzip.NewReader(bodyReader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
body, err := ioutil.ReadAll(bodyReader)
|
||||
defer res.Body.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Response{
|
||||
StatusCode: res.StatusCode,
|
||||
Body: body,
|
||||
Headers: &res.Header,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (h *httpBackend) Limit(rule *LimitRule) error {
|
||||
h.lock.Lock()
|
||||
if h.LimitRules == nil {
|
||||
h.LimitRules = make([]*LimitRule, 0, 8)
|
||||
}
|
||||
h.LimitRules = append(h.LimitRules, rule)
|
||||
h.lock.Unlock()
|
||||
return rule.Init()
|
||||
}
|
||||
|
||||
func (h *httpBackend) Limits(rules []*LimitRule) error {
|
||||
for _, r := range rules {
|
||||
if err := h.Limit(r); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,180 @@
|
|||
// Copyright 2018 Adam Tauber
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package colly
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// Request is the representation of a HTTP request made by a Collector
|
||||
type Request struct {
|
||||
// URL is the parsed URL of the HTTP request
|
||||
URL *url.URL
|
||||
// Headers contains the Request's HTTP headers
|
||||
Headers *http.Header
|
||||
// Ctx is a context between a Request and a Response
|
||||
Ctx *Context
|
||||
// Depth is the number of the parents of the request
|
||||
Depth int
|
||||
// Method is the HTTP method of the request
|
||||
Method string
|
||||
// Body is the request body which is used on POST/PUT requests
|
||||
Body io.Reader
|
||||
// ResponseCharacterencoding is the character encoding of the response body.
|
||||
// Leave it blank to allow automatic character encoding of the response body.
|
||||
// It is empty by default and it can be set in OnRequest callback.
|
||||
ResponseCharacterEncoding string
|
||||
// ID is the Unique identifier of the request
|
||||
ID uint32
|
||||
collector *Collector
|
||||
abort bool
|
||||
baseURL *url.URL
|
||||
// ProxyURL is the proxy address that handles the request
|
||||
ProxyURL string
|
||||
}
|
||||
|
||||
type serializableRequest struct {
|
||||
URL string
|
||||
Method string
|
||||
Body []byte
|
||||
ID uint32
|
||||
Ctx map[string]interface{}
|
||||
Headers http.Header
|
||||
}
|
||||
|
||||
// New creates a new request with the context of the original request
|
||||
func (r *Request) New(method, URL string, body io.Reader) (*Request, error) {
|
||||
u, err := url.Parse(URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Request{
|
||||
Method: method,
|
||||
URL: u,
|
||||
Body: body,
|
||||
Ctx: r.Ctx,
|
||||
Headers: &http.Header{},
|
||||
ID: atomic.AddUint32(&r.collector.requestCount, 1),
|
||||
collector: r.collector,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Abort cancels the HTTP request when called in an OnRequest callback
|
||||
func (r *Request) Abort() {
|
||||
r.abort = true
|
||||
}
|
||||
|
||||
// AbsoluteURL returns with the resolved absolute URL of an URL chunk.
|
||||
// AbsoluteURL returns empty string if the URL chunk is a fragment or
|
||||
// could not be parsed
|
||||
func (r *Request) AbsoluteURL(u string) string {
|
||||
if strings.HasPrefix(u, "#") {
|
||||
return ""
|
||||
}
|
||||
var base *url.URL
|
||||
if r.baseURL != nil {
|
||||
base = r.baseURL
|
||||
} else {
|
||||
base = r.URL
|
||||
}
|
||||
absURL, err := base.Parse(u)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
absURL.Fragment = ""
|
||||
if absURL.Scheme == "//" {
|
||||
absURL.Scheme = r.URL.Scheme
|
||||
}
|
||||
return absURL.String()
|
||||
}
|
||||
|
||||
// Visit continues Collector's collecting job by creating a
|
||||
// request and preserves the Context of the previous request.
|
||||
// Visit also calls the previously provided callbacks
|
||||
func (r *Request) Visit(URL string) error {
|
||||
return r.collector.scrape(r.AbsoluteURL(URL), "GET", r.Depth+1, nil, r.Ctx, nil, true)
|
||||
}
|
||||
|
||||
// Post continues a collector job by creating a POST request and preserves the Context
|
||||
// of the previous request.
|
||||
// Post also calls the previously provided callbacks
|
||||
func (r *Request) Post(URL string, requestData map[string]string) error {
|
||||
return r.collector.scrape(r.AbsoluteURL(URL), "POST", r.Depth+1, createFormReader(requestData), r.Ctx, nil, true)
|
||||
}
|
||||
|
||||
// PostRaw starts a collector job by creating a POST request with raw binary data.
|
||||
// PostRaw preserves the Context of the previous request
|
||||
// and calls the previously provided callbacks
|
||||
func (r *Request) PostRaw(URL string, requestData []byte) error {
|
||||
return r.collector.scrape(r.AbsoluteURL(URL), "POST", r.Depth+1, bytes.NewReader(requestData), r.Ctx, nil, true)
|
||||
}
|
||||
|
||||
// PostMultipart starts a collector job by creating a Multipart POST request
|
||||
// with raw binary data. PostMultipart also calls the previously provided.
|
||||
// callbacks
|
||||
func (r *Request) PostMultipart(URL string, requestData map[string][]byte) error {
|
||||
boundary := randomBoundary()
|
||||
hdr := http.Header{}
|
||||
hdr.Set("Content-Type", "multipart/form-data; boundary="+boundary)
|
||||
hdr.Set("User-Agent", r.collector.UserAgent)
|
||||
return r.collector.scrape(r.AbsoluteURL(URL), "POST", r.Depth+1, createMultipartReader(boundary, requestData), r.Ctx, hdr, true)
|
||||
}
|
||||
|
||||
// Retry submits HTTP request again with the same parameters
|
||||
func (r *Request) Retry() error {
|
||||
return r.collector.scrape(r.URL.String(), r.Method, r.Depth, r.Body, r.Ctx, *r.Headers, false)
|
||||
}
|
||||
|
||||
// Do submits the request
|
||||
func (r *Request) Do() error {
|
||||
return r.collector.scrape(r.URL.String(), r.Method, r.Depth, r.Body, r.Ctx, *r.Headers, !r.collector.AllowURLRevisit)
|
||||
}
|
||||
|
||||
// Marshal serializes the Request
|
||||
func (r *Request) Marshal() ([]byte, error) {
|
||||
ctx := make(map[string]interface{})
|
||||
if r.Ctx != nil {
|
||||
r.Ctx.ForEach(func(k string, v interface{}) interface{} {
|
||||
ctx[k] = v
|
||||
return nil
|
||||
})
|
||||
}
|
||||
var err error
|
||||
var body []byte
|
||||
if r.Body != nil {
|
||||
body, err = ioutil.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
sr := &serializableRequest{
|
||||
URL: r.URL.String(),
|
||||
Method: r.Method,
|
||||
Body: body,
|
||||
ID: r.ID,
|
||||
Ctx: ctx,
|
||||
}
|
||||
if r.Headers != nil {
|
||||
sr.Headers = *r.Headers
|
||||
}
|
||||
return json.Marshal(sr)
|
||||
}
|
|
@ -0,0 +1,99 @@
|
|||
// Copyright 2018 Adam Tauber
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package colly
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"mime"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/saintfish/chardet"
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
||||
// Response is the representation of a HTTP response made by a Collector
|
||||
type Response struct {
|
||||
// StatusCode is the status code of the Response
|
||||
StatusCode int
|
||||
// Body is the content of the Response
|
||||
Body []byte
|
||||
// Ctx is a context between a Request and a Response
|
||||
Ctx *Context
|
||||
// Request is the Request object of the response
|
||||
Request *Request
|
||||
// Headers contains the Response's HTTP headers
|
||||
Headers *http.Header
|
||||
}
|
||||
|
||||
// Save writes response body to disk
|
||||
func (r *Response) Save(fileName string) error {
|
||||
return ioutil.WriteFile(fileName, r.Body, 0644)
|
||||
}
|
||||
|
||||
// FileName returns the sanitized file name parsed from "Content-Disposition"
|
||||
// header or from URL
|
||||
func (r *Response) FileName() string {
|
||||
_, params, err := mime.ParseMediaType(r.Headers.Get("Content-Disposition"))
|
||||
if fName, ok := params["filename"]; ok && err == nil {
|
||||
return SanitizeFileName(fName)
|
||||
}
|
||||
if r.Request.URL.RawQuery != "" {
|
||||
return SanitizeFileName(fmt.Sprintf("%s_%s", r.Request.URL.Path, r.Request.URL.RawQuery))
|
||||
}
|
||||
return SanitizeFileName(strings.TrimPrefix(r.Request.URL.Path, "/"))
|
||||
}
|
||||
|
||||
func (r *Response) fixCharset(detectCharset bool, defaultEncoding string) error {
|
||||
if defaultEncoding != "" {
|
||||
tmpBody, err := encodeBytes(r.Body, "text/plain; charset="+defaultEncoding)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
r.Body = tmpBody
|
||||
return nil
|
||||
}
|
||||
contentType := strings.ToLower(r.Headers.Get("Content-Type"))
|
||||
if !strings.Contains(contentType, "charset") {
|
||||
if !detectCharset {
|
||||
return nil
|
||||
}
|
||||
d := chardet.NewTextDetector()
|
||||
r, err := d.DetectBest(r.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
contentType = "text/plain; charset=" + r.Charset
|
||||
}
|
||||
if strings.Contains(contentType, "utf-8") || strings.Contains(contentType, "utf8") {
|
||||
return nil
|
||||
}
|
||||
tmpBody, err := encodeBytes(r.Body, contentType)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
r.Body = tmpBody
|
||||
return nil
|
||||
}
|
||||
|
||||
func encodeBytes(b []byte, contentType string) ([]byte, error) {
|
||||
r, err := charset.NewReader(bytes.NewReader(b), contentType)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ioutil.ReadAll(r)
|
||||
}
|
|
@ -0,0 +1,128 @@
|
|||
// Copyright 2018 Adam Tauber
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package storage
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/cookiejar"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Storage is an interface which handles Collector's internal data,
|
||||
// like visited urls and cookies.
|
||||
// The default Storage of the Collector is the InMemoryStorage.
|
||||
// Collector's storage can be changed by calling Collector.SetStorage()
|
||||
// function.
|
||||
type Storage interface {
|
||||
// Init initializes the storage
|
||||
Init() error
|
||||
// Visited receives and stores a request ID that is visited by the Collector
|
||||
Visited(requestID uint64) error
|
||||
// IsVisited returns true if the request was visited before IsVisited
|
||||
// is called
|
||||
IsVisited(requestID uint64) (bool, error)
|
||||
// Cookies retrieves stored cookies for a given host
|
||||
Cookies(u *url.URL) string
|
||||
// SetCookies stores cookies for a given host
|
||||
SetCookies(u *url.URL, cookies string)
|
||||
}
|
||||
|
||||
// InMemoryStorage is the default storage backend of colly.
|
||||
// InMemoryStorage keeps cookies and visited urls in memory
|
||||
// without persisting data on the disk.
|
||||
type InMemoryStorage struct {
|
||||
visitedURLs map[uint64]bool
|
||||
lock *sync.RWMutex
|
||||
jar *cookiejar.Jar
|
||||
}
|
||||
|
||||
// Init initializes InMemoryStorage
|
||||
func (s *InMemoryStorage) Init() error {
|
||||
if s.visitedURLs == nil {
|
||||
s.visitedURLs = make(map[uint64]bool)
|
||||
}
|
||||
if s.lock == nil {
|
||||
s.lock = &sync.RWMutex{}
|
||||
}
|
||||
if s.jar == nil {
|
||||
var err error
|
||||
s.jar, err = cookiejar.New(nil)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Visited implements Storage.Visited()
|
||||
func (s *InMemoryStorage) Visited(requestID uint64) error {
|
||||
s.lock.Lock()
|
||||
s.visitedURLs[requestID] = true
|
||||
s.lock.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsVisited implements Storage.IsVisited()
|
||||
func (s *InMemoryStorage) IsVisited(requestID uint64) (bool, error) {
|
||||
s.lock.RLock()
|
||||
visited := s.visitedURLs[requestID]
|
||||
s.lock.RUnlock()
|
||||
return visited, nil
|
||||
}
|
||||
|
||||
// Cookies implements Storage.Cookies()
|
||||
func (s *InMemoryStorage) Cookies(u *url.URL) string {
|
||||
return StringifyCookies(s.jar.Cookies(u))
|
||||
}
|
||||
|
||||
// SetCookies implements Storage.SetCookies()
|
||||
func (s *InMemoryStorage) SetCookies(u *url.URL, cookies string) {
|
||||
s.jar.SetCookies(u, UnstringifyCookies(cookies))
|
||||
}
|
||||
|
||||
// Close implements Storage.Close()
|
||||
func (s *InMemoryStorage) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// StringifyCookies serializes list of http.Cookies to string
|
||||
func StringifyCookies(cookies []*http.Cookie) string {
|
||||
// Stringify cookies.
|
||||
cs := make([]string, len(cookies))
|
||||
for i, c := range cookies {
|
||||
cs[i] = c.String()
|
||||
}
|
||||
return strings.Join(cs, "\n")
|
||||
}
|
||||
|
||||
// UnstringifyCookies deserializes a cookie string to http.Cookies
|
||||
func UnstringifyCookies(s string) []*http.Cookie {
|
||||
h := http.Header{}
|
||||
for _, c := range strings.Split(s, "\n") {
|
||||
h.Add("Set-Cookie", c)
|
||||
}
|
||||
r := http.Response{Header: h}
|
||||
return r.Cookies()
|
||||
}
|
||||
|
||||
// ContainsCookie checks if a cookie name is represented in cookies
|
||||
func ContainsCookie(cookies []*http.Cookie, name string) bool {
|
||||
for _, c := range cookies {
|
||||
if c.Name == name {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
|
@ -0,0 +1,171 @@
|
|||
// Copyright 2018 Adam Tauber
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package colly
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
// Unmarshal is a shorthand for colly.UnmarshalHTML
|
||||
func (h *HTMLElement) Unmarshal(v interface{}) error {
|
||||
return UnmarshalHTML(v, h.DOM)
|
||||
}
|
||||
|
||||
// UnmarshalHTML declaratively extracts text or attributes to a struct from
|
||||
// HTML response using struct tags composed of css selectors.
|
||||
// Allowed struct tags:
|
||||
// - "selector" (required): CSS (goquery) selector of the desired data
|
||||
// - "attr" (optional): Selects the matching element's attribute's value.
|
||||
// Leave it blank or omit to get the text of the element.
|
||||
//
|
||||
// Example struct declaration:
|
||||
//
|
||||
// type Nested struct {
|
||||
// String string `selector:"div > p"`
|
||||
// Classes []string `selector:"li" attr:"class"`
|
||||
// Struct *Nested `selector:"div > div"`
|
||||
// }
|
||||
//
|
||||
// Supported types: struct, *struct, string, []string
|
||||
func UnmarshalHTML(v interface{}, s *goquery.Selection) error {
|
||||
rv := reflect.ValueOf(v)
|
||||
|
||||
if rv.Kind() != reflect.Ptr || rv.IsNil() {
|
||||
return errors.New("Invalid type or nil-pointer")
|
||||
}
|
||||
|
||||
sv := rv.Elem()
|
||||
st := reflect.TypeOf(v).Elem()
|
||||
|
||||
for i := 0; i < sv.NumField(); i++ {
|
||||
attrV := sv.Field(i)
|
||||
if !attrV.CanAddr() || !attrV.CanSet() {
|
||||
continue
|
||||
}
|
||||
if err := unmarshalAttr(s, attrV, st.Field(i)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func unmarshalAttr(s *goquery.Selection, attrV reflect.Value, attrT reflect.StructField) error {
|
||||
selector := attrT.Tag.Get("selector")
|
||||
//selector is "-" specify that field should ignore.
|
||||
if selector == "-" {
|
||||
return nil
|
||||
}
|
||||
htmlAttr := attrT.Tag.Get("attr")
|
||||
// TODO support more types
|
||||
switch attrV.Kind() {
|
||||
case reflect.Slice:
|
||||
if err := unmarshalSlice(s, selector, htmlAttr, attrV); err != nil {
|
||||
return err
|
||||
}
|
||||
case reflect.String:
|
||||
val := getDOMValue(s.Find(selector), htmlAttr)
|
||||
attrV.Set(reflect.Indirect(reflect.ValueOf(val)))
|
||||
case reflect.Struct:
|
||||
if err := unmarshalStruct(s, selector, attrV); err != nil {
|
||||
return err
|
||||
}
|
||||
case reflect.Ptr:
|
||||
if err := unmarshalPtr(s, selector, attrV); err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
return errors.New("Invalid type: " + attrV.String())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func unmarshalStruct(s *goquery.Selection, selector string, attrV reflect.Value) error {
|
||||
newS := s
|
||||
if selector != "" {
|
||||
newS = newS.Find(selector)
|
||||
}
|
||||
if newS.Nodes == nil {
|
||||
return nil
|
||||
}
|
||||
v := reflect.New(attrV.Type())
|
||||
err := UnmarshalHTML(v.Interface(), newS)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
attrV.Set(reflect.Indirect(v))
|
||||
return nil
|
||||
}
|
||||
|
||||
func unmarshalPtr(s *goquery.Selection, selector string, attrV reflect.Value) error {
|
||||
newS := s
|
||||
if selector != "" {
|
||||
newS = newS.Find(selector)
|
||||
}
|
||||
if newS.Nodes == nil {
|
||||
return nil
|
||||
}
|
||||
e := attrV.Type().Elem()
|
||||
if e.Kind() != reflect.Struct {
|
||||
return errors.New("Invalid slice type")
|
||||
}
|
||||
v := reflect.New(e)
|
||||
err := UnmarshalHTML(v.Interface(), newS)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
attrV.Set(v)
|
||||
return nil
|
||||
}
|
||||
|
||||
func unmarshalSlice(s *goquery.Selection, selector, htmlAttr string, attrV reflect.Value) error {
|
||||
if attrV.Pointer() == 0 {
|
||||
v := reflect.MakeSlice(attrV.Type(), 0, 0)
|
||||
attrV.Set(v)
|
||||
}
|
||||
switch attrV.Type().Elem().Kind() {
|
||||
case reflect.String:
|
||||
s.Find(selector).Each(func(_ int, s *goquery.Selection) {
|
||||
val := getDOMValue(s, htmlAttr)
|
||||
attrV.Set(reflect.Append(attrV, reflect.Indirect(reflect.ValueOf(val))))
|
||||
})
|
||||
case reflect.Ptr:
|
||||
s.Find(selector).Each(func(_ int, innerSel *goquery.Selection) {
|
||||
someVal := reflect.New(attrV.Type().Elem().Elem())
|
||||
UnmarshalHTML(someVal.Interface(), innerSel)
|
||||
attrV.Set(reflect.Append(attrV, someVal))
|
||||
})
|
||||
case reflect.Struct:
|
||||
s.Find(selector).Each(func(_ int, innerSel *goquery.Selection) {
|
||||
someVal := reflect.New(attrV.Type().Elem())
|
||||
UnmarshalHTML(someVal.Interface(), innerSel)
|
||||
attrV.Set(reflect.Append(attrV, reflect.Indirect(someVal)))
|
||||
})
|
||||
default:
|
||||
return errors.New("Invalid slice type")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getDOMValue(s *goquery.Selection, attr string) string {
|
||||
if attr == "" {
|
||||
return strings.TrimSpace(s.First().Text())
|
||||
}
|
||||
attrV, _ := s.Attr(attr)
|
||||
return attrV
|
||||
}
|
|
@ -0,0 +1,170 @@
|
|||
// Copyright 2018 Adam Tauber
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package colly
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"strings"
|
||||
|
||||
"github.com/antchfx/htmlquery"
|
||||
"github.com/antchfx/xmlquery"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// XMLElement is the representation of a XML tag.
|
||||
type XMLElement struct {
|
||||
// Name is the name of the tag
|
||||
Name string
|
||||
Text string
|
||||
attributes interface{}
|
||||
// Request is the request object of the element's HTML document
|
||||
Request *Request
|
||||
// Response is the Response object of the element's HTML document
|
||||
Response *Response
|
||||
// DOM is the DOM object of the page. DOM is relative
|
||||
// to the current XMLElement and is either a html.Node or xmlquery.Node
|
||||
// based on how the XMLElement was created.
|
||||
DOM interface{}
|
||||
isHTML bool
|
||||
}
|
||||
|
||||
// NewXMLElementFromHTMLNode creates a XMLElement from a html.Node.
|
||||
func NewXMLElementFromHTMLNode(resp *Response, s *html.Node) *XMLElement {
|
||||
return &XMLElement{
|
||||
Name: s.Data,
|
||||
Request: resp.Request,
|
||||
Response: resp,
|
||||
Text: htmlquery.InnerText(s),
|
||||
DOM: s,
|
||||
attributes: s.Attr,
|
||||
isHTML: true,
|
||||
}
|
||||
}
|
||||
|
||||
// NewXMLElementFromXMLNode creates a XMLElement from a xmlquery.Node.
|
||||
func NewXMLElementFromXMLNode(resp *Response, s *xmlquery.Node) *XMLElement {
|
||||
return &XMLElement{
|
||||
Name: s.Data,
|
||||
Request: resp.Request,
|
||||
Response: resp,
|
||||
Text: s.InnerText(),
|
||||
DOM: s,
|
||||
attributes: s.Attr,
|
||||
isHTML: false,
|
||||
}
|
||||
}
|
||||
|
||||
// Attr returns the selected attribute of a HTMLElement or empty string
|
||||
// if no attribute found
|
||||
func (h *XMLElement) Attr(k string) string {
|
||||
if h.isHTML {
|
||||
for _, a := range h.attributes.([]html.Attribute) {
|
||||
if a.Key == k {
|
||||
return a.Val
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for _, a := range h.attributes.([]xml.Attr) {
|
||||
if a.Name.Local == k {
|
||||
return a.Value
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// ChildText returns the concatenated and stripped text content of the matching
|
||||
// elements.
|
||||
func (h *XMLElement) ChildText(xpathQuery string) string {
|
||||
if h.isHTML {
|
||||
child := htmlquery.FindOne(h.DOM.(*html.Node), xpathQuery)
|
||||
if child == nil {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(htmlquery.InnerText(child))
|
||||
}
|
||||
child := xmlquery.FindOne(h.DOM.(*xmlquery.Node), xpathQuery)
|
||||
if child == nil {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(child.InnerText())
|
||||
|
||||
}
|
||||
|
||||
// ChildAttr returns the stripped text content of the first matching
|
||||
// element's attribute.
|
||||
func (h *XMLElement) ChildAttr(xpathQuery, attrName string) string {
|
||||
if h.isHTML {
|
||||
child := htmlquery.FindOne(h.DOM.(*html.Node), xpathQuery)
|
||||
if child != nil {
|
||||
for _, attr := range child.Attr {
|
||||
if attr.Key == attrName {
|
||||
return strings.TrimSpace(attr.Val)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
child := xmlquery.FindOne(h.DOM.(*xmlquery.Node), xpathQuery)
|
||||
if child != nil {
|
||||
for _, attr := range child.Attr {
|
||||
if attr.Name.Local == attrName {
|
||||
return strings.TrimSpace(attr.Value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// ChildAttrs returns the stripped text content of all the matching
|
||||
// element's attributes.
|
||||
func (h *XMLElement) ChildAttrs(xpathQuery, attrName string) []string {
|
||||
var res []string
|
||||
if h.isHTML {
|
||||
for _, child := range htmlquery.Find(h.DOM.(*html.Node), xpathQuery) {
|
||||
for _, attr := range child.Attr {
|
||||
if attr.Key == attrName {
|
||||
res = append(res, strings.TrimSpace(attr.Val))
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
xmlquery.FindEach(h.DOM.(*xmlquery.Node), xpathQuery, func(i int, child *xmlquery.Node) {
|
||||
for _, attr := range child.Attr {
|
||||
if attr.Name.Local == attrName {
|
||||
res = append(res, strings.TrimSpace(attr.Value))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
// ChildTexts returns an array of strings corresponding to child elements that match the xpath query.
|
||||
// Each item in the array is the stripped text content of the corresponding matching child element.
|
||||
func (h *XMLElement) ChildTexts(xpathQuery string) []string {
|
||||
texts := make([]string, 0)
|
||||
if h.isHTML {
|
||||
for _, child := range htmlquery.Find(h.DOM.(*html.Node), xpathQuery) {
|
||||
texts = append(texts, strings.TrimSpace(htmlquery.InnerText(child)))
|
||||
}
|
||||
} else {
|
||||
xmlquery.FindEach(h.DOM.(*xmlquery.Node), xpathQuery, func(i int, child *xmlquery.Node) {
|
||||
texts = append(texts, strings.TrimSpace(child.InnerText()))
|
||||
})
|
||||
}
|
||||
return texts
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue