You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
envsubst/parse/scan.go

295 lines
5.5 KiB
Go

package parse
import (
"unicode"
"unicode/utf8"
)
// eof rune sent when end of file is reached
var eof = rune(0)
// token is a lexical token.
type token uint
// list of lexical tokens.
const (
// special tokens
tokenIllegal token = iota
tokenEOF
// identifiers and literals
tokenIdent
// operators and delimiters
tokenLbrack
tokenRbrack
tokenQuote
)
// predefined mode bits to control recognition of tokens.
const (
scanIdent byte = 1 << iota
scanLbrack
scanRbrack
scanEscape
)
// predefined mode bits to control escape tokens.
const (
dollar byte = 1 << iota
backslash
escapeAll = dollar | backslash
)
// returns true if rune is accepted.
type acceptFunc func(r rune, i int) bool
// scanner implements a lexical scanner that reads unicode
// characters and tokens from a string buffer.
type scanner struct {
buf string
pos int
start int
width int
mode byte
escapeChars byte
accept acceptFunc
}
// init initializes a scanner with a new buffer.
func (s *scanner) init(buf string) {
s.buf = buf
s.pos = 0
s.start = 0
s.width = 0
s.accept = nil
}
// read returns the next unicode character. It returns eof at
// the end of the string buffer.
func (s *scanner) read() rune {
if s.pos >= len(s.buf) {
s.width = 0
return eof
}
r, w := utf8.DecodeRuneInString(s.buf[s.pos:])
s.width = w
s.pos += s.width
return r
}
func (s *scanner) unread() {
s.pos -= s.width
}
// skip skips over the curring unicode character in the buffer
// by slicing and removing from the buffer.
func (s *scanner) skip() {
l := s.buf[:s.pos-1]
r := s.buf[s.pos:]
s.buf = l + r
}
// peek returns the next unicode character in the buffer without
// advancing the scanner. It returns eof if the scanner's position
// is at the last character of the source.
func (s *scanner) peek() rune {
r := s.read()
s.unread()
return r
}
// string returns the string corresponding to the most recently
// scanned token. Valid after calling scan().
func (s *scanner) string() string {
return s.buf[s.start:s.pos]
}
// tests if the bit exists for a given character bit
func (s *scanner) shouldEscape(character byte) bool {
return s.escapeChars&character != 0
}
// scan reads the next token or Unicode character from source and
// returns it. It returns EOF at the end of the source.
func (s *scanner) scan() token {
s.start = s.pos
r := s.read()
switch {
case r == eof:
return tokenEOF
case s.scanLbrack(r):
return tokenLbrack
case s.scanRbrack(r):
return tokenRbrack
case s.scanIdent(r):
return tokenIdent
}
return tokenIllegal
}
// scanIdent reads the next token or Unicode character from source
// and returns true if the Ident character is accepted.
func (s *scanner) scanIdent(r rune) bool {
if s.mode&scanIdent == 0 {
return false
}
if s.scanEscaped(r) {
s.skip()
} else if !s.accept(r, s.pos-s.start) {
return false
}
loop:
for {
r := s.read()
switch {
case r == eof:
s.unread()
break loop
case s.scanLbrack(r):
s.unread()
s.unread()
break loop
}
if s.scanEscaped(r) {
s.skip()
continue
}
if !s.accept(r, s.pos-s.start) {
s.unread()
break loop
}
}
return true
}
// scanLbrack reads the next token or Unicode character from source
// and returns true if the open bracket is encountered.
func (s *scanner) scanLbrack(r rune) bool {
if s.mode&scanLbrack == 0 {
return false
}
if r == '$' {
if s.read() == '{' {
return true
}
s.unread()
}
return false
}
// scanRbrack reads the next token or Unicode character from source
// and returns true if the closing bracket is encountered.
func (s *scanner) scanRbrack(r rune) bool {
if s.mode&scanRbrack == 0 {
return false
}
return r == '}'
}
// scanEscaped reads the next token or Unicode character from source
// and returns true if it being escaped and should be skipped.
func (s *scanner) scanEscaped(r rune) bool {
if s.mode&scanEscape == 0 {
return false
}
if r == '$' && s.shouldEscape(dollar) {
if s.peek() == '$' {
return true
}
}
if r == '\\' && s.shouldEscape(backslash) {
switch s.peek() {
case '/', '\\':
return true
default:
return false
}
}
return false
}
//
// scanner functions accept or reject runes.
//
func acceptRune(r rune, i int) bool {
return true
}
func acceptIdent(r rune, i int) bool {
return unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_'
}
func acceptColon(r rune, i int) bool {
return r == ':'
}
func acceptOneHash(r rune, i int) bool {
return r == '#' && i == 1
}
func acceptNone(r rune, i int) bool {
return false
}
func acceptNotClosing(r rune, i int) bool {
return r != '}'
}
func acceptHashFunc(r rune, i int) bool {
return r == '#' && i < 3
}
func acceptPercentFunc(r rune, i int) bool {
return r == '%' && i < 3
}
func acceptDefaultFunc(r rune, i int) bool {
switch {
case i == 1 && r == ':':
return true
case i == 2 && (r == '=' || r == '-' || r == '?' || r == '+'):
return true
default:
return false
}
}
func acceptReplaceFunc(r rune, i int) bool {
switch {
case i == 1 && r == '/':
return true
case i == 2 && (r == '/' || r == '#' || r == '%'):
return true
default:
return false
}
}
func acceptOneEqual(r rune, i int) bool {
return i == 1 && r == '='
}
func acceptOneColon(r rune, i int) bool {
return i == 1 && r == ':'
}
func rejectColonClose(r rune, i int) bool {
return r != ':' && r != '}'
}
func acceptSlash(r rune, i int) bool {
return r == '/'
}
func acceptNotSlash(r rune, i int) bool {
return r != '/'
}
func acceptCasingFunc(r rune, i int) bool {
return (r == ',' || r == '^') && i < 3
}