本站源代码
Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

194 Zeilen
4.2KB

  1. // Package shlex provides a simple lexical analysis like Unix shell.
  2. package shlex
  3. import (
  4. "bufio"
  5. "errors"
  6. "io"
  7. "strings"
  8. "unicode"
  9. )
  10. var (
  11. ErrNoClosing = errors.New("No closing quotation")
  12. ErrNoEscaped = errors.New("No escaped character")
  13. )
  14. // Tokenizer is the interface that classifies a token according to
  15. // words, whitespaces, quotations, escapes and escaped quotations.
  16. type Tokenizer interface {
  17. IsWord(rune) bool
  18. IsWhitespace(rune) bool
  19. IsQuote(rune) bool
  20. IsEscape(rune) bool
  21. IsEscapedQuote(rune) bool
  22. }
  23. // DefaultTokenizer implements a simple tokenizer like Unix shell.
  24. type DefaultTokenizer struct{}
  25. func (t *DefaultTokenizer) IsWord(r rune) bool {
  26. return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r)
  27. }
  28. func (t *DefaultTokenizer) IsQuote(r rune) bool {
  29. switch r {
  30. case '\'', '"':
  31. return true
  32. default:
  33. return false
  34. }
  35. }
  36. func (t *DefaultTokenizer) IsWhitespace(r rune) bool {
  37. return unicode.IsSpace(r)
  38. }
  39. func (t *DefaultTokenizer) IsEscape(r rune) bool {
  40. return r == '\\'
  41. }
  42. func (t *DefaultTokenizer) IsEscapedQuote(r rune) bool {
  43. return r == '"'
  44. }
  45. // Lexer represents a lexical analyzer.
  46. type Lexer struct {
  47. reader *bufio.Reader
  48. tokenizer Tokenizer
  49. posix bool
  50. whitespacesplit bool
  51. }
  52. // NewLexer creates a new Lexer reading from io.Reader. This Lexer
  53. // has a DefaultTokenizer according to posix and whitespacesplit
  54. // rules.
  55. func NewLexer(r io.Reader, posix, whitespacesplit bool) *Lexer {
  56. return &Lexer{
  57. reader: bufio.NewReader(r),
  58. tokenizer: &DefaultTokenizer{},
  59. posix: posix,
  60. whitespacesplit: whitespacesplit,
  61. }
  62. }
  63. // NewLexerString creates a new Lexer reading from a string. This
  64. // Lexer has a DefaultTokenizer according to posix and whitespacesplit
  65. // rules.
  66. func NewLexerString(s string, posix, whitespacesplit bool) *Lexer {
  67. return NewLexer(strings.NewReader(s), posix, whitespacesplit)
  68. }
  69. // Split splits a string according to posix or non-posix rules.
  70. func Split(s string, posix bool) ([]string, error) {
  71. return NewLexerString(s, posix, true).Split()
  72. }
  73. // SetTokenizer sets a Tokenizer.
  74. func (l *Lexer) SetTokenizer(t Tokenizer) {
  75. l.tokenizer = t
  76. }
  77. func (l *Lexer) Split() ([]string, error) {
  78. result := make([]string, 0)
  79. for {
  80. token, err := l.readToken()
  81. if token != "" {
  82. result = append(result, token)
  83. }
  84. if err == io.EOF {
  85. break
  86. } else if err != nil {
  87. return result, err
  88. }
  89. }
  90. return result, nil
  91. }
  92. func (l *Lexer) readToken() (string, error) {
  93. t := l.tokenizer
  94. token := ""
  95. quoted := false
  96. state := ' '
  97. escapedstate := ' '
  98. scanning:
  99. for {
  100. next, _, err := l.reader.ReadRune()
  101. if err != nil {
  102. if t.IsQuote(state) {
  103. return token, ErrNoClosing
  104. } else if t.IsEscape(state) {
  105. return token, ErrNoEscaped
  106. }
  107. return token, err
  108. }
  109. switch {
  110. case t.IsWhitespace(state):
  111. switch {
  112. case t.IsWhitespace(next):
  113. break scanning
  114. case l.posix && t.IsEscape(next):
  115. escapedstate = 'a'
  116. state = next
  117. case t.IsWord(next):
  118. token += string(next)
  119. state = 'a'
  120. case t.IsQuote(next):
  121. if !l.posix {
  122. token += string(next)
  123. }
  124. state = next
  125. default:
  126. token = string(next)
  127. if l.whitespacesplit {
  128. state = 'a'
  129. } else if token != "" || (l.posix && quoted) {
  130. break scanning
  131. }
  132. }
  133. case t.IsQuote(state):
  134. quoted = true
  135. switch {
  136. case next == state:
  137. if !l.posix {
  138. token += string(next)
  139. break scanning
  140. } else {
  141. state = 'a'
  142. }
  143. case l.posix && t.IsEscape(next) && t.IsEscapedQuote(state):
  144. escapedstate = state
  145. state = next
  146. default:
  147. token += string(next)
  148. }
  149. case t.IsEscape(state):
  150. if t.IsQuote(escapedstate) && next != state && next != escapedstate {
  151. token += string(state)
  152. }
  153. token += string(next)
  154. state = escapedstate
  155. case t.IsWord(state):
  156. switch {
  157. case t.IsWhitespace(next):
  158. if token != "" || (l.posix && quoted) {
  159. break scanning
  160. }
  161. case l.posix && t.IsQuote(next):
  162. state = next
  163. case l.posix && t.IsEscape(next):
  164. escapedstate = 'a'
  165. state = next
  166. case t.IsWord(next) || t.IsQuote(next):
  167. token += string(next)
  168. default:
  169. if l.whitespacesplit {
  170. token += string(next)
  171. } else if token != "" {
  172. l.reader.UnreadRune()
  173. break scanning
  174. }
  175. }
  176. }
  177. }
  178. return token, nil
  179. }
上海开阖软件有限公司 沪ICP备12045867号-1