in order to get what is required and what is not.
This commit is contained in:
Adrien Delorme 2019-05-28 15:24:58 +02:00
parent 2620e18247
commit 31b66a63b1
2 changed files with 43 additions and 0 deletions

1
.gitignore vendored
View File

@ -26,3 +26,4 @@ packer-test*.log
Thumbs.db
/packer.exe
.project
cache

View File

@ -0,0 +1,42 @@
package main
import (
"fmt"
"strings"
"github.com/gocolly/colly"
)
const (
DocsUrl = "https://www.packer.io/docs/"
CacheDir = "cache/"
)
func main() {
c := colly.NewCollector()
// Find and visit all doc pages
c.OnHTML("a[href]", func(e *colly.HTMLElement) {
url := e.Attr("href")
if !strings.HasPrefix(url, "/docs/builders") {
return
}
e.Request.Visit(url)
})
c.OnHTML("#required- + ul a[name]", func(e *colly.HTMLElement) {
builder := e.Request.URL.Path[strings.Index(e.Request.URL.Path, "/builders/")+len("/builders/"):]
builder = strings.TrimSuffix(builder, ".html")
text := e.DOM.Parent().Text()
text = strings.ReplaceAll(text, "\n", "")
text = strings.TrimSpace(text)
fmt.Printf("required: %25s builder: %20s text: %s\n", e.Attr("name"), builder, text)
})
c.CacheDir = CacheDir
c.Visit(DocsUrl)
}