91 lines
2.8 KiB
Go
91 lines
2.8 KiB
Go
// Package camelcase is a micro package to split the words of a camelcase type
|
|
// string into a slice of words.
|
|
package camelcase
|
|
|
|
import (
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// Split splits the camelcase word and returns a list of words. It also
|
|
// supports digits. Both lower camel case and upper camel case are supported.
|
|
// For more info please check: http://en.wikipedia.org/wiki/CamelCase
|
|
//
|
|
// Examples
|
|
//
|
|
// "" => [""]
|
|
// "lowercase" => ["lowercase"]
|
|
// "Class" => ["Class"]
|
|
// "MyClass" => ["My", "Class"]
|
|
// "MyC" => ["My", "C"]
|
|
// "HTML" => ["HTML"]
|
|
// "PDFLoader" => ["PDF", "Loader"]
|
|
// "AString" => ["A", "String"]
|
|
// "SimpleXMLParser" => ["Simple", "XML", "Parser"]
|
|
// "vimRPCPlugin" => ["vim", "RPC", "Plugin"]
|
|
// "GL11Version" => ["GL", "11", "Version"]
|
|
// "99Bottles" => ["99", "Bottles"]
|
|
// "May5" => ["May", "5"]
|
|
// "BFG9000" => ["BFG", "9000"]
|
|
// "BöseÜberraschung" => ["Böse", "Überraschung"]
|
|
// "Two spaces" => ["Two", " ", "spaces"]
|
|
// "BadUTF8\xe2\xe2\xa1" => ["BadUTF8\xe2\xe2\xa1"]
|
|
//
|
|
// Splitting rules
|
|
//
|
|
// 1) If string is not valid UTF-8, return it without splitting as
|
|
// single item array.
|
|
// 2) Assign all unicode characters into one of 4 sets: lower case
|
|
// letters, upper case letters, numbers, and all other characters.
|
|
// 3) Iterate through characters of string, introducing splits
|
|
// between adjacent characters that belong to different sets.
|
|
// 4) Iterate through array of split strings, and if a given string
|
|
// is upper case:
|
|
// if subsequent string is lower case:
|
|
// move last character of upper case string to beginning of
|
|
// lower case string
|
|
func Split(src string) (entries []string) {
|
|
// don't split invalid utf8
|
|
if !utf8.ValidString(src) {
|
|
return []string{src}
|
|
}
|
|
entries = []string{}
|
|
var runes [][]rune
|
|
lastClass := 0
|
|
class := 0
|
|
// split into fields based on class of unicode character
|
|
for _, r := range src {
|
|
switch true {
|
|
case unicode.IsLower(r):
|
|
class = 1
|
|
case unicode.IsUpper(r):
|
|
class = 2
|
|
case unicode.IsDigit(r):
|
|
class = 3
|
|
default:
|
|
class = 4
|
|
}
|
|
if class == lastClass {
|
|
runes[len(runes)-1] = append(runes[len(runes)-1], r)
|
|
} else {
|
|
runes = append(runes, []rune{r})
|
|
}
|
|
lastClass = class
|
|
}
|
|
// handle upper case -> lower case sequences, e.g.
|
|
// "PDFL", "oader" -> "PDF", "Loader"
|
|
for i := 0; i < len(runes)-1; i++ {
|
|
if unicode.IsUpper(runes[i][0]) && unicode.IsLower(runes[i+1][0]) {
|
|
runes[i+1] = append([]rune{runes[i][len(runes[i])-1]}, runes[i+1]...)
|
|
runes[i] = runes[i][:len(runes[i])-1]
|
|
}
|
|
}
|
|
// construct []string from results
|
|
for _, s := range runes {
|
|
if len(s) > 0 {
|
|
entries = append(entries, string(s))
|
|
}
|
|
}
|
|
return
|
|
}
|