本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

440 lines
9.2KB

  1. // Copyright (C) MongoDB, Inc. 2017-present.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License"); you may
  4. // not use this file except in compliance with the License. You may obtain
  5. // a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
  6. package bsonrw
  7. import (
  8. "bytes"
  9. "errors"
  10. "fmt"
  11. "io"
  12. "math"
  13. "strconv"
  14. "strings"
  15. "unicode"
  16. )
  17. type jsonTokenType byte
  18. const (
  19. jttBeginObject jsonTokenType = iota
  20. jttEndObject
  21. jttBeginArray
  22. jttEndArray
  23. jttColon
  24. jttComma
  25. jttInt32
  26. jttInt64
  27. jttDouble
  28. jttString
  29. jttBool
  30. jttNull
  31. jttEOF
  32. )
  33. type jsonToken struct {
  34. t jsonTokenType
  35. v interface{}
  36. p int
  37. }
  38. type jsonScanner struct {
  39. r io.Reader
  40. buf []byte
  41. pos int
  42. lastReadErr error
  43. }
  44. // nextToken returns the next JSON token if one exists. A token is a character
  45. // of the JSON grammar, a number, a string, or a literal.
  46. func (js *jsonScanner) nextToken() (*jsonToken, error) {
  47. c, err := js.readNextByte()
  48. // keep reading until a non-space is encountered (break on read error or EOF)
  49. for isWhiteSpace(c) && err == nil {
  50. c, err = js.readNextByte()
  51. }
  52. if err == io.EOF {
  53. return &jsonToken{t: jttEOF}, nil
  54. } else if err != nil {
  55. return nil, err
  56. }
  57. // switch on the character
  58. switch c {
  59. case '{':
  60. return &jsonToken{t: jttBeginObject, v: byte('{'), p: js.pos - 1}, nil
  61. case '}':
  62. return &jsonToken{t: jttEndObject, v: byte('}'), p: js.pos - 1}, nil
  63. case '[':
  64. return &jsonToken{t: jttBeginArray, v: byte('['), p: js.pos - 1}, nil
  65. case ']':
  66. return &jsonToken{t: jttEndArray, v: byte(']'), p: js.pos - 1}, nil
  67. case ':':
  68. return &jsonToken{t: jttColon, v: byte(':'), p: js.pos - 1}, nil
  69. case ',':
  70. return &jsonToken{t: jttComma, v: byte(','), p: js.pos - 1}, nil
  71. case '"': // RFC-8259 only allows for double quotes (") not single (')
  72. return js.scanString()
  73. default:
  74. // check if it's a number
  75. if c == '-' || isDigit(c) {
  76. return js.scanNumber(c)
  77. } else if c == 't' || c == 'f' || c == 'n' {
  78. // maybe a literal
  79. return js.scanLiteral(c)
  80. } else {
  81. return nil, fmt.Errorf("invalid JSON input. Position: %d. Character: %c", js.pos-1, c)
  82. }
  83. }
  84. }
  85. // readNextByte attempts to read the next byte from the buffer. If the buffer
  86. // has been exhausted, this function calls readIntoBuf, thus refilling the
  87. // buffer and resetting the read position to 0
  88. func (js *jsonScanner) readNextByte() (byte, error) {
  89. if js.pos >= len(js.buf) {
  90. err := js.readIntoBuf()
  91. if err != nil {
  92. return 0, err
  93. }
  94. }
  95. b := js.buf[js.pos]
  96. js.pos++
  97. return b, nil
  98. }
  99. // readNNextBytes reads n bytes into dst, starting at offset
  100. func (js *jsonScanner) readNNextBytes(dst []byte, n, offset int) error {
  101. var err error
  102. for i := 0; i < n; i++ {
  103. dst[i+offset], err = js.readNextByte()
  104. if err != nil {
  105. return err
  106. }
  107. }
  108. return nil
  109. }
  110. // readIntoBuf reads up to 512 bytes from the scanner's io.Reader into the buffer
  111. func (js *jsonScanner) readIntoBuf() error {
  112. if js.lastReadErr != nil {
  113. js.buf = js.buf[:0]
  114. js.pos = 0
  115. return js.lastReadErr
  116. }
  117. if cap(js.buf) == 0 {
  118. js.buf = make([]byte, 0, 512)
  119. }
  120. n, err := js.r.Read(js.buf[:cap(js.buf)])
  121. if err != nil {
  122. js.lastReadErr = err
  123. if n > 0 {
  124. err = nil
  125. }
  126. }
  127. js.buf = js.buf[:n]
  128. js.pos = 0
  129. return err
  130. }
  131. func isWhiteSpace(c byte) bool {
  132. return c == ' ' || c == '\t' || c == '\r' || c == '\n'
  133. }
  134. func isDigit(c byte) bool {
  135. return unicode.IsDigit(rune(c))
  136. }
  137. func isValueTerminator(c byte) bool {
  138. return c == ',' || c == '}' || c == ']' || isWhiteSpace(c)
  139. }
  140. // scanString reads from an opening '"' to a closing '"' and handles escaped characters
  141. func (js *jsonScanner) scanString() (*jsonToken, error) {
  142. var b bytes.Buffer
  143. var c byte
  144. var err error
  145. p := js.pos - 1
  146. for {
  147. c, err = js.readNextByte()
  148. if err != nil {
  149. if err == io.EOF {
  150. return nil, errors.New("end of input in JSON string")
  151. }
  152. return nil, err
  153. }
  154. switch c {
  155. case '\\':
  156. c, err = js.readNextByte()
  157. switch c {
  158. case '"', '\\', '/', '\'':
  159. b.WriteByte(c)
  160. case 'b':
  161. b.WriteByte('\b')
  162. case 'f':
  163. b.WriteByte('\f')
  164. case 'n':
  165. b.WriteByte('\n')
  166. case 'r':
  167. b.WriteByte('\r')
  168. case 't':
  169. b.WriteByte('\t')
  170. case 'u':
  171. us := make([]byte, 4)
  172. err = js.readNNextBytes(us, 4, 0)
  173. if err != nil {
  174. return nil, fmt.Errorf("invalid unicode sequence in JSON string: %s", us)
  175. }
  176. s := fmt.Sprintf(`\u%s`, us)
  177. s, err = strconv.Unquote(strings.Replace(strconv.Quote(s), `\\u`, `\u`, 1))
  178. if err != nil {
  179. return nil, err
  180. }
  181. b.WriteString(s)
  182. default:
  183. return nil, fmt.Errorf("invalid escape sequence in JSON string '\\%c'", c)
  184. }
  185. case '"':
  186. return &jsonToken{t: jttString, v: b.String(), p: p}, nil
  187. default:
  188. b.WriteByte(c)
  189. }
  190. }
  191. }
  192. // scanLiteral reads an unquoted sequence of characters and determines if it is one of
  193. // three valid JSON literals (true, false, null); if so, it returns the appropriate
  194. // jsonToken; otherwise, it returns an error
  195. func (js *jsonScanner) scanLiteral(first byte) (*jsonToken, error) {
  196. p := js.pos - 1
  197. lit := make([]byte, 4)
  198. lit[0] = first
  199. err := js.readNNextBytes(lit, 3, 1)
  200. if err != nil {
  201. return nil, err
  202. }
  203. c5, err := js.readNextByte()
  204. if bytes.Equal([]byte("true"), lit) && (isValueTerminator(c5) || err == io.EOF) {
  205. js.pos = int(math.Max(0, float64(js.pos-1)))
  206. return &jsonToken{t: jttBool, v: true, p: p}, nil
  207. } else if bytes.Equal([]byte("null"), lit) && (isValueTerminator(c5) || err == io.EOF) {
  208. js.pos = int(math.Max(0, float64(js.pos-1)))
  209. return &jsonToken{t: jttNull, v: nil, p: p}, nil
  210. } else if bytes.Equal([]byte("fals"), lit) {
  211. if c5 == 'e' {
  212. c5, err = js.readNextByte()
  213. if isValueTerminator(c5) || err == io.EOF {
  214. js.pos = int(math.Max(0, float64(js.pos-1)))
  215. return &jsonToken{t: jttBool, v: false, p: p}, nil
  216. }
  217. }
  218. }
  219. return nil, fmt.Errorf("invalid JSON literal. Position: %d, literal: %s", p, lit)
  220. }
  221. type numberScanState byte
  222. const (
  223. nssSawLeadingMinus numberScanState = iota
  224. nssSawLeadingZero
  225. nssSawIntegerDigits
  226. nssSawDecimalPoint
  227. nssSawFractionDigits
  228. nssSawExponentLetter
  229. nssSawExponentSign
  230. nssSawExponentDigits
  231. nssDone
  232. nssInvalid
  233. )
  234. // scanNumber reads a JSON number (according to RFC-8259)
  235. func (js *jsonScanner) scanNumber(first byte) (*jsonToken, error) {
  236. var b bytes.Buffer
  237. var s numberScanState
  238. var c byte
  239. var err error
  240. t := jttInt64 // assume it's an int64 until the type can be determined
  241. start := js.pos - 1
  242. b.WriteByte(first)
  243. switch first {
  244. case '-':
  245. s = nssSawLeadingMinus
  246. case '0':
  247. s = nssSawLeadingZero
  248. default:
  249. s = nssSawIntegerDigits
  250. }
  251. for {
  252. c, err = js.readNextByte()
  253. if err != nil && err != io.EOF {
  254. return nil, err
  255. }
  256. switch s {
  257. case nssSawLeadingMinus:
  258. switch c {
  259. case '0':
  260. s = nssSawLeadingZero
  261. b.WriteByte(c)
  262. default:
  263. if isDigit(c) {
  264. s = nssSawIntegerDigits
  265. b.WriteByte(c)
  266. } else {
  267. s = nssInvalid
  268. }
  269. }
  270. case nssSawLeadingZero:
  271. switch c {
  272. case '.':
  273. s = nssSawDecimalPoint
  274. b.WriteByte(c)
  275. case 'e', 'E':
  276. s = nssSawExponentLetter
  277. b.WriteByte(c)
  278. case '}', ']', ',':
  279. s = nssDone
  280. default:
  281. if isWhiteSpace(c) || err == io.EOF {
  282. s = nssDone
  283. } else {
  284. s = nssInvalid
  285. }
  286. }
  287. case nssSawIntegerDigits:
  288. switch c {
  289. case '.':
  290. s = nssSawDecimalPoint
  291. b.WriteByte(c)
  292. case 'e', 'E':
  293. s = nssSawExponentLetter
  294. b.WriteByte(c)
  295. case '}', ']', ',':
  296. s = nssDone
  297. default:
  298. if isWhiteSpace(c) || err == io.EOF {
  299. s = nssDone
  300. } else if isDigit(c) {
  301. s = nssSawIntegerDigits
  302. b.WriteByte(c)
  303. } else {
  304. s = nssInvalid
  305. }
  306. }
  307. case nssSawDecimalPoint:
  308. t = jttDouble
  309. if isDigit(c) {
  310. s = nssSawFractionDigits
  311. b.WriteByte(c)
  312. } else {
  313. s = nssInvalid
  314. }
  315. case nssSawFractionDigits:
  316. switch c {
  317. case 'e', 'E':
  318. s = nssSawExponentLetter
  319. b.WriteByte(c)
  320. case '}', ']', ',':
  321. s = nssDone
  322. default:
  323. if isWhiteSpace(c) || err == io.EOF {
  324. s = nssDone
  325. } else if isDigit(c) {
  326. s = nssSawFractionDigits
  327. b.WriteByte(c)
  328. } else {
  329. s = nssInvalid
  330. }
  331. }
  332. case nssSawExponentLetter:
  333. t = jttDouble
  334. switch c {
  335. case '+', '-':
  336. s = nssSawExponentSign
  337. b.WriteByte(c)
  338. default:
  339. if isDigit(c) {
  340. s = nssSawExponentDigits
  341. b.WriteByte(c)
  342. } else {
  343. s = nssInvalid
  344. }
  345. }
  346. case nssSawExponentSign:
  347. if isDigit(c) {
  348. s = nssSawExponentDigits
  349. b.WriteByte(c)
  350. } else {
  351. s = nssInvalid
  352. }
  353. case nssSawExponentDigits:
  354. switch c {
  355. case '}', ']', ',':
  356. s = nssDone
  357. default:
  358. if isWhiteSpace(c) || err == io.EOF {
  359. s = nssDone
  360. } else if isDigit(c) {
  361. s = nssSawExponentDigits
  362. b.WriteByte(c)
  363. } else {
  364. s = nssInvalid
  365. }
  366. }
  367. }
  368. switch s {
  369. case nssInvalid:
  370. return nil, fmt.Errorf("invalid JSON number. Position: %d", start)
  371. case nssDone:
  372. js.pos = int(math.Max(0, float64(js.pos-1)))
  373. if t != jttDouble {
  374. v, err := strconv.ParseInt(b.String(), 10, 64)
  375. if err == nil {
  376. if v < math.MinInt32 || v > math.MaxInt32 {
  377. return &jsonToken{t: jttInt64, v: v, p: start}, nil
  378. }
  379. return &jsonToken{t: jttInt32, v: int32(v), p: start}, nil
  380. }
  381. }
  382. v, err := strconv.ParseFloat(b.String(), 64)
  383. if err != nil {
  384. return nil, err
  385. }
  386. return &jsonToken{t: jttDouble, v: v, p: start}, nil
  387. }
  388. }
  389. }
上海开阖软件有限公司 沪ICP备12045867号-1