本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

120 lines
3.4KB

  1. // Copyright (c) 2017 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package regexp
  15. import (
  16. "fmt"
  17. "regexp/syntax"
  18. )
  19. // ErrNoEmpty returned when "zero width assertions" are used
  20. var ErrNoEmpty = fmt.Errorf("zero width assertions not allowed")
  21. // ErrNoWordBoundary returned when word boundaries are used
  22. var ErrNoWordBoundary = fmt.Errorf("word boundaries are not allowed")
  23. // ErrNoBytes returned when byte literals are used
  24. var ErrNoBytes = fmt.Errorf("byte literals are not allowed")
  25. // ErrNoLazy returned when lazy quantifiers are used
  26. var ErrNoLazy = fmt.Errorf("lazy quantifiers are not allowed")
  27. // ErrCompiledTooBig returned when regular expression parses into
  28. // too many instructions
  29. var ErrCompiledTooBig = fmt.Errorf("too many instructions")
  30. var DefaultLimit = uint(10 * (1 << 20))
  31. // Regexp implements the vellum.Automaton interface for matcing a user
  32. // specified regular expression.
  33. type Regexp struct {
  34. orig string
  35. dfa *dfa
  36. }
  37. // NewRegexp creates a new Regular Expression automaton with the specified
  38. // expression. By default it is limited to approximately 10MB for the
  39. // compiled finite state automaton. If this size is exceeded,
  40. // ErrCompiledTooBig will be returned.
  41. func New(expr string) (*Regexp, error) {
  42. return NewWithLimit(expr, DefaultLimit)
  43. }
  44. // NewRegexpWithLimit creates a new Regular Expression automaton with
  45. // the specified expression. The size of the compiled finite state
  46. // automaton exceeds the user specified size, ErrCompiledTooBig will be
  47. // returned.
  48. func NewWithLimit(expr string, size uint) (*Regexp, error) {
  49. parsed, err := syntax.Parse(expr, syntax.Perl)
  50. if err != nil {
  51. return nil, err
  52. }
  53. return NewParsedWithLimit(expr, parsed, size)
  54. }
  55. func NewParsedWithLimit(expr string, parsed *syntax.Regexp, size uint) (*Regexp, error) {
  56. compiler := newCompiler(size)
  57. insts, err := compiler.compile(parsed)
  58. if err != nil {
  59. return nil, err
  60. }
  61. dfaBuilder := newDfaBuilder(insts)
  62. dfa, err := dfaBuilder.build()
  63. if err != nil {
  64. return nil, err
  65. }
  66. return &Regexp{
  67. orig: expr,
  68. dfa: dfa,
  69. }, nil
  70. }
  71. // Start returns the start state of this automaton.
  72. func (r *Regexp) Start() int {
  73. return 1
  74. }
  75. // IsMatch returns if the specified state is a matching state.
  76. func (r *Regexp) IsMatch(s int) bool {
  77. if s < len(r.dfa.states) {
  78. return r.dfa.states[s].match
  79. }
  80. return false
  81. }
  82. // CanMatch returns if the specified state can ever transition to a matching
  83. // state.
  84. func (r *Regexp) CanMatch(s int) bool {
  85. if s < len(r.dfa.states) && s > 0 {
  86. return true
  87. }
  88. return false
  89. }
  90. // WillAlwaysMatch returns if the specified state will always end in a
  91. // matching state.
  92. func (r *Regexp) WillAlwaysMatch(int) bool {
  93. return false
  94. }
  95. // Accept returns the new state, resulting from the transition byte b
  96. // when currently in the state s.
  97. func (r *Regexp) Accept(s int, b byte) int {
  98. if s < len(r.dfa.states) {
  99. return r.dfa.states[s].next[b]
  100. }
  101. return 0
  102. }
上海开阖软件有限公司 沪ICP备12045867号-1