packer-cn/vendor/github.com/rwtodd/Go.Sed/sed/lex.go

442 lines
9.3 KiB
Go

package sed
// the lexer for SED. The point of the lexer is to
// reliably transform the input into a series of token structs.
// These structs know the source location, and the token type, and
// any arguments to the token (e.g., a regexp's '/' argument is the
// regular expression itself).
//
// The lexer also simplifies and regularises the input, for instance
// by eliminating comments.
import (
"bufio"
"bytes"
"fmt"
"io"
"strings"
"unicode"
)
type location struct {
line int
pos int
}
func (l *location) String() string {
return fmt.Sprintf("at line %d, pos %d", l.line, l.pos)
}
const (
tok_NUM = iota
tok_RX
tok_COMMA
tok_BANG
tok_DOLLAR
tok_LBRACE
tok_RBRACE
tok_EOL
tok_CMD
tok_CHANGE
tok_LABEL
)
type token struct {
location
typ int
letter rune
args []string
}
// ----------------------------------------------------------
// Location-tracking reader
// ----------------------------------------------------------
type locReader struct {
location
eol bool // state for end of line, true when last rune was '\n'
r *bufio.Reader
}
func (lr *locReader) ReadRune() (rune, int, error) {
r, i, err := lr.r.ReadRune()
lr.pos++
if lr.eol {
lr.pos = 1
lr.line++
lr.eol = false
}
if r == '\n' {
lr.eol = true
}
return r, i, err
}
func (lr *locReader) UnreadRune() error {
lr.pos--
lr.eol = false
if lr.pos == 0 {
lr.line--
lr.eol = true
}
return lr.r.UnreadRune()
}
func (lr *locReader) ReadLine() (nxtl string, err error) {
var prefix = true
var line []byte
var lines []string
for prefix {
line, prefix, err = lr.r.ReadLine()
if err != nil {
break
}
buf := make([]byte, len(line))
copy(buf, line)
lines = append(lines, string(buf))
}
nxtl = strings.Join(lines, "")
// fixup our position information
lr.pos += len(nxtl)
lr.eol = true
return
}
// ----------------------------------------------------------
// lexer functions
// ----------------------------------------------------------
func skipComment(r *locReader) (rune, error) {
var err error
var cur rune = ' '
for (cur != '\n') && (err == nil) {
cur, _, err = r.ReadRune()
}
return ';', err
}
func skipWS(r *locReader) (rune, error) {
var err error
var cur rune = ' '
for {
switch {
case cur == '\n':
return ';', err
case cur == '#':
return skipComment(r)
case !unicode.IsSpace(cur):
return cur, err
}
cur, _, err = r.ReadRune()
}
}
func readNumber(r *locReader, character rune) (string, error) {
var buffer bytes.Buffer
var err error
for (err == nil) && unicode.IsDigit(character) {
buffer.WriteRune(character)
character, _, err = r.ReadRune()
}
if err == nil {
err = r.UnreadRune()
}
return buffer.String(), err
}
// readDelimited reads until it finds the delimter character,
// returning the string (not including the delimiter). It does
// allow the delimiter to be escaped by a backslash ('\').
// It is an error to reach EOL while looking for the delimiter.
func readDelimited(r *locReader, delimiter rune) (string, error) {
var buffer bytes.Buffer
var err error
var character rune
var previous rune
character, _, err = r.ReadRune()
for (err == nil) &&
(character != '\n') &&
((character != delimiter) || (previous == '\\')) {
buffer.WriteRune(character)
previous = character
character, _, err = r.ReadRune()
}
if character == '\n' {
err = fmt.Errorf("end-of-line while looking for %c", delimiter)
}
if err == io.EOF {
err = fmt.Errorf("end-of-file while looking for %c", delimiter)
}
return buffer.String(), err
}
// readReplacement reads until it finds the delimter character,
// returning the string (not including the delimiter). It does
// allow the delimiter to be escaped by a backslash ('\'), and it
// does interpret a few common backslash escapes like \n and \t.
// It is an error to reach an unescaped EOL while looking for the delimiter.
func readReplacement(r *locReader, delimiter rune) (string, error) {
var buffer bytes.Buffer
var err error
var character rune
var previous rune
character, _, err = r.ReadRune()
for err == nil {
if character == '\r' {
character, _, err = r.ReadRune()
continue
}
if previous == '\\' {
// find out what we escaped...
switch character {
case 'r':
buffer.WriteRune('\r')
case 't':
buffer.WriteRune('\t')
case 'n':
buffer.WriteRune('\n')
case '\\':
buffer.WriteRune(character)
character = ' ' // don't escape the next one
default:
buffer.WriteRune(character)
}
} else {
if character == delimiter ||
character == '\n' {
break
} else if character != '\\' {
buffer.WriteRune(character)
}
}
previous = character
character, _, err = r.ReadRune()
}
if character == '\n' {
err = fmt.Errorf("end-of-line while looking for %c", delimiter)
}
if err == io.EOF {
err = fmt.Errorf("end-of-file while looking for %c", delimiter)
}
return buffer.String(), err
}
// readMultiLine reads until it finds an unescaped newline. It discards the
// first line, if it is empty, because commands like "c\", "a\" and "i\" are
// intended to be used that way.
func readMultiLine(r *locReader) (string, error) {
var lines []string
var err error
first := true
hasSlash := true // does the line end in a slash?
for hasSlash {
txt, err := r.ReadLine()
if err != nil {
break
}
tlen := len(txt)
// strip off the final '\', if there is one
if tlen > 0 && txt[tlen-1] == '\\' {
txt = txt[:tlen-1]
} else {
hasSlash = false
}
// If it's empty and the first line, forget it.
// Otherwise, add it to the line list
if !first || tlen > 1 {
lines = append(lines, txt)
}
first = false
}
// for sed's purposes, we want a final newline...
lines = append(lines, "")
return strings.Join(lines, "\n"), err
}
// readIdentifier skips any whitespace, and then reads until it
// finds either a ';' or a non-alphanumeric character. It
// returns the string it reads.
func readIdentifier(r *locReader) (string, error) {
var buffer bytes.Buffer
var err error
var character rune
character, err = skipWS(r)
for (err == nil) && (character != ';') && !unicode.IsSpace(character) {
buffer.WriteRune(character)
character, _, err = r.ReadRune()
}
if err == nil {
err = r.UnreadRune()
}
return buffer.String(), err
}
func readSubstitution(r *locReader) ([]string, error) {
var ans = make([]string, 3)
var err error
// step 1.: get the delimiter character for substitutions
var delimiter rune
delimiter, _, err = r.ReadRune()
if err != nil {
return ans, err
}
// step 2.: read the regexp
ans[0], err = readDelimited(r, delimiter)
if err != nil {
return ans, err
}
// step 3.: read the replacement
ans[1], err = readReplacement(r, delimiter)
if err != nil {
return ans, err
}
// step 4.: read the modifiers
ans[2], err = readIdentifier(r)
return ans, err
}
func readTranslation(r *locReader) ([]string, error) {
var ans = make([]string, 2)
var err error
// step 1.: get the delimiter character for substitutions
var delimiter rune
delimiter, _, err = r.ReadRune()
if err != nil {
return ans, err
}
// step 2.: read the regexp
ans[0], err = readDelimited(r, delimiter)
if err != nil {
return ans, err
}
// step 3.: read the replacement
ans[1], err = readDelimited(r, delimiter)
if err != nil {
return ans, err
}
return ans, err
}
func lex(r *bufio.Reader, ch chan<- *token, errch chan<- error) {
defer close(ch)
defer close(errch)
rdr := locReader{}
rdr.r = r
rdr.eol = true
var err error
var cur rune
var topLoc = rdr.location
for err == nil {
cur, err = skipWS(&rdr)
if err != nil {
break
}
topLoc = rdr.location // remember the start of the command
switch cur {
case ';':
ch <- &token{topLoc, tok_EOL, cur, nil}
case ',':
ch <- &token{topLoc, tok_COMMA, cur, nil}
case '{':
ch <- &token{topLoc, tok_LBRACE, cur, nil}
case '}':
ch <- &token{topLoc, tok_RBRACE, cur, nil}
case '!':
ch <- &token{topLoc, tok_BANG, cur, nil}
case '/':
var rx string
rx, err = readDelimited(&rdr, '/')
ch <- &token{topLoc, tok_RX, cur, []string{rx}}
case '$':
ch <- &token{topLoc, tok_DOLLAR, cur, nil}
case ':':
var label string
label, err = readIdentifier(&rdr)
ch <- &token{topLoc, tok_LABEL, cur, []string{label}}
case 'b', 't': // branches...
var label string
label, err = readIdentifier(&rdr)
ch <- &token{topLoc, tok_CMD, cur, []string{label}}
case 's': // substitution
var args []string
args, err = readSubstitution(&rdr)
ch <- &token{topLoc, tok_CMD, cur, args}
case 'y': // translation
var args []string
args, err = readTranslation(&rdr)
ch <- &token{topLoc, tok_CMD, cur, args}
case 'c': // change
var txt string
txt, err = readMultiLine(&rdr)
ch <- &token{topLoc, tok_CHANGE, cur, []string{txt}}
case 'i', 'a': // insert or append
var txt string
txt, err = readMultiLine(&rdr)
ch <- &token{topLoc, tok_CMD, cur, []string{txt}}
case 'r', 'w':
var fname string
fname, err = readIdentifier(&rdr)
ch <- &token{topLoc, tok_CMD, cur, []string{fname}}
default:
if unicode.IsDigit(cur) {
var num string
num, err = readNumber(&rdr, cur)
ch <- &token{topLoc, tok_NUM, cur, []string{num}}
} else {
// it's just a argument-free command
ch <- &token{topLoc, tok_CMD, cur, nil}
}
}
}
if err != io.EOF {
errch <- fmt.Errorf("Error reading... <%s> %v", err.Error(), &topLoc)
}
}