scrape builder docs from https://www.packer.io/docs/
in order to get what is required and what is not.
This commit is contained in:
parent
2620e18247
commit
31b66a63b1
1
.gitignore
vendored
1
.gitignore
vendored
@ -26,3 +26,4 @@ packer-test*.log
|
|||||||
Thumbs.db
|
Thumbs.db
|
||||||
/packer.exe
|
/packer.exe
|
||||||
.project
|
.project
|
||||||
|
cache
|
||||||
|
42
cmd/doc-required-scraper/main.go
Normal file
42
cmd/doc-required-scraper/main.go
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/gocolly/colly"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
DocsUrl = "https://www.packer.io/docs/"
|
||||||
|
CacheDir = "cache/"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
c := colly.NewCollector()
|
||||||
|
|
||||||
|
// Find and visit all doc pages
|
||||||
|
c.OnHTML("a[href]", func(e *colly.HTMLElement) {
|
||||||
|
url := e.Attr("href")
|
||||||
|
if !strings.HasPrefix(url, "/docs/builders") {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
e.Request.Visit(url)
|
||||||
|
})
|
||||||
|
|
||||||
|
c.OnHTML("#required- + ul a[name]", func(e *colly.HTMLElement) {
|
||||||
|
|
||||||
|
builder := e.Request.URL.Path[strings.Index(e.Request.URL.Path, "/builders/")+len("/builders/"):]
|
||||||
|
builder = strings.TrimSuffix(builder, ".html")
|
||||||
|
|
||||||
|
text := e.DOM.Parent().Text()
|
||||||
|
text = strings.ReplaceAll(text, "\n", "")
|
||||||
|
text = strings.TrimSpace(text)
|
||||||
|
|
||||||
|
fmt.Printf("required: %25s builder: %20s text: %s\n", e.Attr("name"), builder, text)
|
||||||
|
})
|
||||||
|
|
||||||
|
c.CacheDir = CacheDir
|
||||||
|
|
||||||
|
c.Visit(DocsUrl)
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user