本站源代码
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

408 lines
8.9KB

  1. // Copyright 2018 Frank Schroeder. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. //
  5. // Parts of the lexer are from the template/text/parser package
  6. // For these parts the following applies:
  7. //
  8. // Copyright 2011 The Go Authors. All rights reserved.
  9. // Use of this source code is governed by a BSD-style
  10. // license that can be found in the LICENSE file of the go 1.2
  11. // distribution.
  12. package properties
  13. import (
  14. "fmt"
  15. "strconv"
  16. "strings"
  17. "unicode/utf8"
  18. )
  19. // item represents a token or text string returned from the scanner.
  20. type item struct {
  21. typ itemType // The type of this item.
  22. pos int // The starting position, in bytes, of this item in the input string.
  23. val string // The value of this item.
  24. }
  25. func (i item) String() string {
  26. switch {
  27. case i.typ == itemEOF:
  28. return "EOF"
  29. case i.typ == itemError:
  30. return i.val
  31. case len(i.val) > 10:
  32. return fmt.Sprintf("%.10q...", i.val)
  33. }
  34. return fmt.Sprintf("%q", i.val)
  35. }
  36. // itemType identifies the type of lex items.
  37. type itemType int
  38. const (
  39. itemError itemType = iota // error occurred; value is text of error
  40. itemEOF
  41. itemKey // a key
  42. itemValue // a value
  43. itemComment // a comment
  44. )
  45. // defines a constant for EOF
  46. const eof = -1
  47. // permitted whitespace characters space, FF and TAB
  48. const whitespace = " \f\t"
  49. // stateFn represents the state of the scanner as a function that returns the next state.
  50. type stateFn func(*lexer) stateFn
  51. // lexer holds the state of the scanner.
  52. type lexer struct {
  53. input string // the string being scanned
  54. state stateFn // the next lexing function to enter
  55. pos int // current position in the input
  56. start int // start position of this item
  57. width int // width of last rune read from input
  58. lastPos int // position of most recent item returned by nextItem
  59. runes []rune // scanned runes for this item
  60. items chan item // channel of scanned items
  61. }
  62. // next returns the next rune in the input.
  63. func (l *lexer) next() rune {
  64. if l.pos >= len(l.input) {
  65. l.width = 0
  66. return eof
  67. }
  68. r, w := utf8.DecodeRuneInString(l.input[l.pos:])
  69. l.width = w
  70. l.pos += l.width
  71. return r
  72. }
  73. // peek returns but does not consume the next rune in the input.
  74. func (l *lexer) peek() rune {
  75. r := l.next()
  76. l.backup()
  77. return r
  78. }
  79. // backup steps back one rune. Can only be called once per call of next.
  80. func (l *lexer) backup() {
  81. l.pos -= l.width
  82. }
  83. // emit passes an item back to the client.
  84. func (l *lexer) emit(t itemType) {
  85. i := item{t, l.start, string(l.runes)}
  86. l.items <- i
  87. l.start = l.pos
  88. l.runes = l.runes[:0]
  89. }
  90. // ignore skips over the pending input before this point.
  91. func (l *lexer) ignore() {
  92. l.start = l.pos
  93. }
  94. // appends the rune to the current value
  95. func (l *lexer) appendRune(r rune) {
  96. l.runes = append(l.runes, r)
  97. }
  98. // accept consumes the next rune if it's from the valid set.
  99. func (l *lexer) accept(valid string) bool {
  100. if strings.ContainsRune(valid, l.next()) {
  101. return true
  102. }
  103. l.backup()
  104. return false
  105. }
  106. // acceptRun consumes a run of runes from the valid set.
  107. func (l *lexer) acceptRun(valid string) {
  108. for strings.ContainsRune(valid, l.next()) {
  109. }
  110. l.backup()
  111. }
  112. // acceptRunUntil consumes a run of runes up to a terminator.
  113. func (l *lexer) acceptRunUntil(term rune) {
  114. for term != l.next() {
  115. }
  116. l.backup()
  117. }
  118. // hasText returns true if the current parsed text is not empty.
  119. func (l *lexer) isNotEmpty() bool {
  120. return l.pos > l.start
  121. }
  122. // lineNumber reports which line we're on, based on the position of
  123. // the previous item returned by nextItem. Doing it this way
  124. // means we don't have to worry about peek double counting.
  125. func (l *lexer) lineNumber() int {
  126. return 1 + strings.Count(l.input[:l.lastPos], "\n")
  127. }
  128. // errorf returns an error token and terminates the scan by passing
  129. // back a nil pointer that will be the next state, terminating l.nextItem.
  130. func (l *lexer) errorf(format string, args ...interface{}) stateFn {
  131. l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)}
  132. return nil
  133. }
  134. // nextItem returns the next item from the input.
  135. func (l *lexer) nextItem() item {
  136. i := <-l.items
  137. l.lastPos = i.pos
  138. return i
  139. }
  140. // lex creates a new scanner for the input string.
  141. func lex(input string) *lexer {
  142. l := &lexer{
  143. input: input,
  144. items: make(chan item),
  145. runes: make([]rune, 0, 32),
  146. }
  147. go l.run()
  148. return l
  149. }
  150. // run runs the state machine for the lexer.
  151. func (l *lexer) run() {
  152. for l.state = lexBeforeKey(l); l.state != nil; {
  153. l.state = l.state(l)
  154. }
  155. }
  156. // state functions
  157. // lexBeforeKey scans until a key begins.
  158. func lexBeforeKey(l *lexer) stateFn {
  159. switch r := l.next(); {
  160. case isEOF(r):
  161. l.emit(itemEOF)
  162. return nil
  163. case isEOL(r):
  164. l.ignore()
  165. return lexBeforeKey
  166. case isComment(r):
  167. return lexComment
  168. case isWhitespace(r):
  169. l.ignore()
  170. return lexBeforeKey
  171. default:
  172. l.backup()
  173. return lexKey
  174. }
  175. }
  176. // lexComment scans a comment line. The comment character has already been scanned.
  177. func lexComment(l *lexer) stateFn {
  178. l.acceptRun(whitespace)
  179. l.ignore()
  180. for {
  181. switch r := l.next(); {
  182. case isEOF(r):
  183. l.ignore()
  184. l.emit(itemEOF)
  185. return nil
  186. case isEOL(r):
  187. l.emit(itemComment)
  188. return lexBeforeKey
  189. default:
  190. l.appendRune(r)
  191. }
  192. }
  193. }
  194. // lexKey scans the key up to a delimiter
  195. func lexKey(l *lexer) stateFn {
  196. var r rune
  197. Loop:
  198. for {
  199. switch r = l.next(); {
  200. case isEscape(r):
  201. err := l.scanEscapeSequence()
  202. if err != nil {
  203. return l.errorf(err.Error())
  204. }
  205. case isEndOfKey(r):
  206. l.backup()
  207. break Loop
  208. case isEOF(r):
  209. break Loop
  210. default:
  211. l.appendRune(r)
  212. }
  213. }
  214. if len(l.runes) > 0 {
  215. l.emit(itemKey)
  216. }
  217. if isEOF(r) {
  218. l.emit(itemEOF)
  219. return nil
  220. }
  221. return lexBeforeValue
  222. }
  223. // lexBeforeValue scans the delimiter between key and value.
  224. // Leading and trailing whitespace is ignored.
  225. // We expect to be just after the key.
  226. func lexBeforeValue(l *lexer) stateFn {
  227. l.acceptRun(whitespace)
  228. l.accept(":=")
  229. l.acceptRun(whitespace)
  230. l.ignore()
  231. return lexValue
  232. }
  233. // lexValue scans text until the end of the line. We expect to be just after the delimiter.
  234. func lexValue(l *lexer) stateFn {
  235. for {
  236. switch r := l.next(); {
  237. case isEscape(r):
  238. if isEOL(l.peek()) {
  239. l.next()
  240. l.acceptRun(whitespace)
  241. } else {
  242. err := l.scanEscapeSequence()
  243. if err != nil {
  244. return l.errorf(err.Error())
  245. }
  246. }
  247. case isEOL(r):
  248. l.emit(itemValue)
  249. l.ignore()
  250. return lexBeforeKey
  251. case isEOF(r):
  252. l.emit(itemValue)
  253. l.emit(itemEOF)
  254. return nil
  255. default:
  256. l.appendRune(r)
  257. }
  258. }
  259. }
  260. // scanEscapeSequence scans either one of the escaped characters
  261. // or a unicode literal. We expect to be after the escape character.
  262. func (l *lexer) scanEscapeSequence() error {
  263. switch r := l.next(); {
  264. case isEscapedCharacter(r):
  265. l.appendRune(decodeEscapedCharacter(r))
  266. return nil
  267. case atUnicodeLiteral(r):
  268. return l.scanUnicodeLiteral()
  269. case isEOF(r):
  270. return fmt.Errorf("premature EOF")
  271. // silently drop the escape character and append the rune as is
  272. default:
  273. l.appendRune(r)
  274. return nil
  275. }
  276. }
  277. // scans a unicode literal in the form \uXXXX. We expect to be after the \u.
  278. func (l *lexer) scanUnicodeLiteral() error {
  279. // scan the digits
  280. d := make([]rune, 4)
  281. for i := 0; i < 4; i++ {
  282. d[i] = l.next()
  283. if d[i] == eof || !strings.ContainsRune("0123456789abcdefABCDEF", d[i]) {
  284. return fmt.Errorf("invalid unicode literal")
  285. }
  286. }
  287. // decode the digits into a rune
  288. r, err := strconv.ParseInt(string(d), 16, 0)
  289. if err != nil {
  290. return err
  291. }
  292. l.appendRune(rune(r))
  293. return nil
  294. }
  295. // decodeEscapedCharacter returns the unescaped rune. We expect to be after the escape character.
  296. func decodeEscapedCharacter(r rune) rune {
  297. switch r {
  298. case 'f':
  299. return '\f'
  300. case 'n':
  301. return '\n'
  302. case 'r':
  303. return '\r'
  304. case 't':
  305. return '\t'
  306. default:
  307. return r
  308. }
  309. }
  310. // atUnicodeLiteral reports whether we are at a unicode literal.
  311. // The escape character has already been consumed.
  312. func atUnicodeLiteral(r rune) bool {
  313. return r == 'u'
  314. }
  315. // isComment reports whether we are at the start of a comment.
  316. func isComment(r rune) bool {
  317. return r == '#' || r == '!'
  318. }
  319. // isEndOfKey reports whether the rune terminates the current key.
  320. func isEndOfKey(r rune) bool {
  321. return strings.ContainsRune(" \f\t\r\n:=", r)
  322. }
  323. // isEOF reports whether we are at EOF.
  324. func isEOF(r rune) bool {
  325. return r == eof
  326. }
  327. // isEOL reports whether we are at a new line character.
  328. func isEOL(r rune) bool {
  329. return r == '\n' || r == '\r'
  330. }
  331. // isEscape reports whether the rune is the escape character which
  332. // prefixes unicode literals and other escaped characters.
  333. func isEscape(r rune) bool {
  334. return r == '\\'
  335. }
  336. // isEscapedCharacter reports whether we are at one of the characters that need escaping.
  337. // The escape character has already been consumed.
  338. func isEscapedCharacter(r rune) bool {
  339. return strings.ContainsRune(" :=fnrt", r)
  340. }
  341. // isWhitespace reports whether the rune is a whitespace character.
  342. func isWhitespace(r rune) bool {
  343. return strings.ContainsRune(whitespace, r)
  344. }
上海开阖软件有限公司 沪ICP备12045867号-1