|
- // Copyright (c) 2017 Couchbase, Inc.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
-
- package regexp
-
- import (
- "encoding/binary"
- "fmt"
- )
-
- // StateLimit is the maximum number of states allowed
- const StateLimit = 10000
-
- // ErrTooManyStates is returned if you attempt to build a Levenshtein
- // automaton which requires too many states.
- var ErrTooManyStates = fmt.Errorf("dfa contains more than %d states",
- StateLimit)
-
- type dfaBuilder struct {
- dfa *dfa
- cache map[string]int
- keyBuf []byte
- }
-
- func newDfaBuilder(insts prog) *dfaBuilder {
- d := &dfaBuilder{
- dfa: &dfa{
- insts: insts,
- states: make([]state, 0, 16),
- },
- cache: make(map[string]int, 1024),
- }
- // add 0 state that is invalid
- d.dfa.states = append(d.dfa.states, state{
- next: make([]int, 256),
- match: false,
- })
- return d
- }
-
- func (d *dfaBuilder) build() (*dfa, error) {
- cur := newSparseSet(uint(len(d.dfa.insts)))
- next := newSparseSet(uint(len(d.dfa.insts)))
-
- d.dfa.add(cur, 0)
- ns, instsReuse := d.cachedState(cur, nil)
- states := intStack{ns}
- seen := make(map[int]struct{})
- var s int
- states, s = states.Pop()
- for s != 0 {
- for b := 0; b < 256; b++ {
- var ns int
- ns, instsReuse = d.runState(cur, next, s, byte(b), instsReuse)
- if ns != 0 {
- if _, ok := seen[ns]; !ok {
- seen[ns] = struct{}{}
- states = states.Push(ns)
- }
- }
- if len(d.dfa.states) > StateLimit {
- return nil, ErrTooManyStates
- }
- }
- states, s = states.Pop()
- }
- return d.dfa, nil
- }
-
- func (d *dfaBuilder) runState(cur, next *sparseSet, state int, b byte, instsReuse []uint) (
- int, []uint) {
- cur.Clear()
- for _, ip := range d.dfa.states[state].insts {
- cur.Add(ip)
- }
- d.dfa.run(cur, next, b)
- var nextState int
- nextState, instsReuse = d.cachedState(next, instsReuse)
- d.dfa.states[state].next[b] = nextState
- return nextState, instsReuse
- }
-
- func instsKey(insts []uint, buf []byte) []byte {
- if cap(buf) < 8*len(insts) {
- buf = make([]byte, 8*len(insts))
- } else {
- buf = buf[0 : 8*len(insts)]
- }
- for i, inst := range insts {
- binary.LittleEndian.PutUint64(buf[i*8:], uint64(inst))
- }
- return buf
- }
-
- func (d *dfaBuilder) cachedState(set *sparseSet,
- instsReuse []uint) (int, []uint) {
- insts := instsReuse[:0]
- if cap(insts) == 0 {
- insts = make([]uint, 0, set.Len())
- }
- var isMatch bool
- for i := uint(0); i < uint(set.Len()); i++ {
- ip := set.Get(i)
- switch d.dfa.insts[ip].op {
- case OpRange:
- insts = append(insts, ip)
- case OpMatch:
- isMatch = true
- insts = append(insts, ip)
- }
- }
- if len(insts) == 0 {
- return 0, insts
- }
- d.keyBuf = instsKey(insts, d.keyBuf)
- v, ok := d.cache[string(d.keyBuf)]
- if ok {
- return v, insts
- }
- d.dfa.states = append(d.dfa.states, state{
- insts: insts,
- next: make([]int, 256),
- match: isMatch,
- })
- newV := len(d.dfa.states) - 1
- d.cache[string(d.keyBuf)] = newV
- return newV, nil
- }
-
- type dfa struct {
- insts prog
- states []state
- }
-
- func (d *dfa) add(set *sparseSet, ip uint) {
- if set.Contains(ip) {
- return
- }
- set.Add(ip)
- switch d.insts[ip].op {
- case OpJmp:
- d.add(set, d.insts[ip].to)
- case OpSplit:
- d.add(set, d.insts[ip].splitA)
- d.add(set, d.insts[ip].splitB)
- }
- }
-
- func (d *dfa) run(from, to *sparseSet, b byte) bool {
- to.Clear()
- var isMatch bool
- for i := uint(0); i < uint(from.Len()); i++ {
- ip := from.Get(i)
- switch d.insts[ip].op {
- case OpMatch:
- isMatch = true
- case OpRange:
- if d.insts[ip].rangeStart <= b &&
- b <= d.insts[ip].rangeEnd {
- d.add(to, ip+1)
- }
- }
- }
- return isMatch
- }
-
- type state struct {
- insts []uint
- next []int
- match bool
- }
-
- type intStack []int
-
- func (s intStack) Push(v int) intStack {
- return append(s, v)
- }
-
- func (s intStack) Pop() (intStack, int) {
- l := len(s)
- if l < 1 {
- return s, 0
- }
- return s[:l-1], s[l-1]
- }
|