|
- package revision
-
- import (
- "bufio"
- "io"
- "unicode"
- )
-
- // runeCategoryValidator takes a rune as input and
- // validates it belongs to a rune category
- type runeCategoryValidator func(r rune) bool
-
- // tokenizeExpression aggegates a series of runes matching check predicate into a single
- // string and provides given tokenType as token type
- func tokenizeExpression(ch rune, tokenType token, check runeCategoryValidator, r *bufio.Reader) (token, string, error) {
- var data []rune
- data = append(data, ch)
-
- for {
- c, _, err := r.ReadRune()
-
- if c == zeroRune {
- break
- }
-
- if err != nil {
- return tokenError, "", err
- }
-
- if check(c) {
- data = append(data, c)
- } else {
- err := r.UnreadRune()
-
- if err != nil {
- return tokenError, "", err
- }
-
- return tokenType, string(data), nil
- }
- }
-
- return tokenType, string(data), nil
- }
-
- var zeroRune = rune(0)
-
- // scanner represents a lexical scanner.
- type scanner struct {
- r *bufio.Reader
- }
-
- // newScanner returns a new instance of scanner.
- func newScanner(r io.Reader) *scanner {
- return &scanner{r: bufio.NewReader(r)}
- }
-
- // Scan extracts tokens and their strings counterpart
- // from the reader
- func (s *scanner) scan() (token, string, error) {
- ch, _, err := s.r.ReadRune()
-
- if err != nil && err != io.EOF {
- return tokenError, "", err
- }
-
- switch ch {
- case zeroRune:
- return eof, "", nil
- case ':':
- return colon, string(ch), nil
- case '~':
- return tilde, string(ch), nil
- case '^':
- return caret, string(ch), nil
- case '.':
- return dot, string(ch), nil
- case '/':
- return slash, string(ch), nil
- case '{':
- return obrace, string(ch), nil
- case '}':
- return cbrace, string(ch), nil
- case '-':
- return minus, string(ch), nil
- case '@':
- return at, string(ch), nil
- case '\\':
- return aslash, string(ch), nil
- case '?':
- return qmark, string(ch), nil
- case '*':
- return asterisk, string(ch), nil
- case '[':
- return obracket, string(ch), nil
- case '!':
- return emark, string(ch), nil
- }
-
- if unicode.IsSpace(ch) {
- return space, string(ch), nil
- }
-
- if unicode.IsControl(ch) {
- return control, string(ch), nil
- }
-
- if unicode.IsLetter(ch) {
- return tokenizeExpression(ch, word, unicode.IsLetter, s.r)
- }
-
- if unicode.IsNumber(ch) {
- return tokenizeExpression(ch, number, unicode.IsNumber, s.r)
- }
-
- return tokenError, string(ch), nil
- }
|