本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

274 lines
4.4KB

  1. package lexer
  2. import (
  3. "bytes"
  4. "fmt"
  5. "github.com/gobwas/glob/util/runes"
  6. "unicode/utf8"
  7. )
  8. const (
  9. char_any = '*'
  10. char_comma = ','
  11. char_single = '?'
  12. char_escape = '\\'
  13. char_range_open = '['
  14. char_range_close = ']'
  15. char_terms_open = '{'
  16. char_terms_close = '}'
  17. char_range_not = '!'
  18. char_range_between = '-'
  19. )
  20. var specials = []byte{
  21. char_any,
  22. char_single,
  23. char_escape,
  24. char_range_open,
  25. char_range_close,
  26. char_terms_open,
  27. char_terms_close,
  28. }
  29. func Special(c byte) bool {
  30. return bytes.IndexByte(specials, c) != -1
  31. }
  32. type tokens []Token
  33. func (i *tokens) shift() (ret Token) {
  34. ret = (*i)[0]
  35. copy(*i, (*i)[1:])
  36. *i = (*i)[:len(*i)-1]
  37. return
  38. }
  39. func (i *tokens) push(v Token) {
  40. *i = append(*i, v)
  41. }
  42. func (i *tokens) empty() bool {
  43. return len(*i) == 0
  44. }
  45. var eof rune = 0
  46. type lexer struct {
  47. data string
  48. pos int
  49. err error
  50. tokens tokens
  51. termsLevel int
  52. lastRune rune
  53. lastRuneSize int
  54. hasRune bool
  55. }
  56. func NewLexer(source string) *lexer {
  57. l := &lexer{
  58. data: source,
  59. tokens: tokens(make([]Token, 0, 4)),
  60. }
  61. return l
  62. }
  63. func (l *lexer) Next() Token {
  64. if l.err != nil {
  65. return Token{Error, l.err.Error()}
  66. }
  67. if !l.tokens.empty() {
  68. return l.tokens.shift()
  69. }
  70. l.fetchItem()
  71. return l.Next()
  72. }
  73. func (l *lexer) peek() (r rune, w int) {
  74. if l.pos == len(l.data) {
  75. return eof, 0
  76. }
  77. r, w = utf8.DecodeRuneInString(l.data[l.pos:])
  78. if r == utf8.RuneError {
  79. l.errorf("could not read rune")
  80. r = eof
  81. w = 0
  82. }
  83. return
  84. }
  85. func (l *lexer) read() rune {
  86. if l.hasRune {
  87. l.hasRune = false
  88. l.seek(l.lastRuneSize)
  89. return l.lastRune
  90. }
  91. r, s := l.peek()
  92. l.seek(s)
  93. l.lastRune = r
  94. l.lastRuneSize = s
  95. return r
  96. }
  97. func (l *lexer) seek(w int) {
  98. l.pos += w
  99. }
  100. func (l *lexer) unread() {
  101. if l.hasRune {
  102. l.errorf("could not unread rune")
  103. return
  104. }
  105. l.seek(-l.lastRuneSize)
  106. l.hasRune = true
  107. }
  108. func (l *lexer) errorf(f string, v ...interface{}) {
  109. l.err = fmt.Errorf(f, v...)
  110. }
  111. func (l *lexer) inTerms() bool {
  112. return l.termsLevel > 0
  113. }
  114. func (l *lexer) termsEnter() {
  115. l.termsLevel++
  116. }
  117. func (l *lexer) termsLeave() {
  118. l.termsLevel--
  119. }
  120. var inTextBreakers = []rune{char_single, char_any, char_range_open, char_terms_open}
  121. var inTermsBreakers = append(inTextBreakers, char_terms_close, char_comma)
  122. func (l *lexer) fetchItem() {
  123. r := l.read()
  124. switch {
  125. case r == eof:
  126. l.tokens.push(Token{EOF, ""})
  127. case r == char_terms_open:
  128. l.termsEnter()
  129. l.tokens.push(Token{TermsOpen, string(r)})
  130. case r == char_comma && l.inTerms():
  131. l.tokens.push(Token{Separator, string(r)})
  132. case r == char_terms_close && l.inTerms():
  133. l.tokens.push(Token{TermsClose, string(r)})
  134. l.termsLeave()
  135. case r == char_range_open:
  136. l.tokens.push(Token{RangeOpen, string(r)})
  137. l.fetchRange()
  138. case r == char_single:
  139. l.tokens.push(Token{Single, string(r)})
  140. case r == char_any:
  141. if l.read() == char_any {
  142. l.tokens.push(Token{Super, string(r) + string(r)})
  143. } else {
  144. l.unread()
  145. l.tokens.push(Token{Any, string(r)})
  146. }
  147. default:
  148. l.unread()
  149. var breakers []rune
  150. if l.inTerms() {
  151. breakers = inTermsBreakers
  152. } else {
  153. breakers = inTextBreakers
  154. }
  155. l.fetchText(breakers)
  156. }
  157. }
  158. func (l *lexer) fetchRange() {
  159. var wantHi bool
  160. var wantClose bool
  161. var seenNot bool
  162. for {
  163. r := l.read()
  164. if r == eof {
  165. l.errorf("unexpected end of input")
  166. return
  167. }
  168. if wantClose {
  169. if r != char_range_close {
  170. l.errorf("expected close range character")
  171. } else {
  172. l.tokens.push(Token{RangeClose, string(r)})
  173. }
  174. return
  175. }
  176. if wantHi {
  177. l.tokens.push(Token{RangeHi, string(r)})
  178. wantClose = true
  179. continue
  180. }
  181. if !seenNot && r == char_range_not {
  182. l.tokens.push(Token{Not, string(r)})
  183. seenNot = true
  184. continue
  185. }
  186. if n, w := l.peek(); n == char_range_between {
  187. l.seek(w)
  188. l.tokens.push(Token{RangeLo, string(r)})
  189. l.tokens.push(Token{RangeBetween, string(n)})
  190. wantHi = true
  191. continue
  192. }
  193. l.unread() // unread first peek and fetch as text
  194. l.fetchText([]rune{char_range_close})
  195. wantClose = true
  196. }
  197. }
  198. func (l *lexer) fetchText(breakers []rune) {
  199. var data []rune
  200. var escaped bool
  201. reading:
  202. for {
  203. r := l.read()
  204. if r == eof {
  205. break
  206. }
  207. if !escaped {
  208. if r == char_escape {
  209. escaped = true
  210. continue
  211. }
  212. if runes.IndexRune(breakers, r) != -1 {
  213. l.unread()
  214. break reading
  215. }
  216. }
  217. escaped = false
  218. data = append(data, r)
  219. }
  220. if len(data) > 0 {
  221. l.tokens.push(Token{Text, string(data)})
  222. }
  223. }
上海开阖软件有限公司 沪ICP备12045867号-1