本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

526 lines
11KB

  1. package compiler
  2. // TODO use constructor with all matchers, and to their structs private
  3. // TODO glue multiple Text nodes (like after QuoteMeta)
  4. import (
  5. "fmt"
  6. "reflect"
  7. "github.com/gobwas/glob/match"
  8. "github.com/gobwas/glob/syntax/ast"
  9. "github.com/gobwas/glob/util/runes"
  10. )
  11. func optimizeMatcher(matcher match.Matcher) match.Matcher {
  12. switch m := matcher.(type) {
  13. case match.Any:
  14. if len(m.Separators) == 0 {
  15. return match.NewSuper()
  16. }
  17. case match.AnyOf:
  18. if len(m.Matchers) == 1 {
  19. return m.Matchers[0]
  20. }
  21. return m
  22. case match.List:
  23. if m.Not == false && len(m.List) == 1 {
  24. return match.NewText(string(m.List))
  25. }
  26. return m
  27. case match.BTree:
  28. m.Left = optimizeMatcher(m.Left)
  29. m.Right = optimizeMatcher(m.Right)
  30. r, ok := m.Value.(match.Text)
  31. if !ok {
  32. return m
  33. }
  34. var (
  35. leftNil = m.Left == nil
  36. rightNil = m.Right == nil
  37. )
  38. if leftNil && rightNil {
  39. return match.NewText(r.Str)
  40. }
  41. _, leftSuper := m.Left.(match.Super)
  42. lp, leftPrefix := m.Left.(match.Prefix)
  43. la, leftAny := m.Left.(match.Any)
  44. _, rightSuper := m.Right.(match.Super)
  45. rs, rightSuffix := m.Right.(match.Suffix)
  46. ra, rightAny := m.Right.(match.Any)
  47. switch {
  48. case leftSuper && rightSuper:
  49. return match.NewContains(r.Str, false)
  50. case leftSuper && rightNil:
  51. return match.NewSuffix(r.Str)
  52. case rightSuper && leftNil:
  53. return match.NewPrefix(r.Str)
  54. case leftNil && rightSuffix:
  55. return match.NewPrefixSuffix(r.Str, rs.Suffix)
  56. case rightNil && leftPrefix:
  57. return match.NewPrefixSuffix(lp.Prefix, r.Str)
  58. case rightNil && leftAny:
  59. return match.NewSuffixAny(r.Str, la.Separators)
  60. case leftNil && rightAny:
  61. return match.NewPrefixAny(r.Str, ra.Separators)
  62. }
  63. return m
  64. }
  65. return matcher
  66. }
  67. func compileMatchers(matchers []match.Matcher) (match.Matcher, error) {
  68. if len(matchers) == 0 {
  69. return nil, fmt.Errorf("compile error: need at least one matcher")
  70. }
  71. if len(matchers) == 1 {
  72. return matchers[0], nil
  73. }
  74. if m := glueMatchers(matchers); m != nil {
  75. return m, nil
  76. }
  77. idx := -1
  78. maxLen := -1
  79. var val match.Matcher
  80. for i, matcher := range matchers {
  81. if l := matcher.Len(); l != -1 && l >= maxLen {
  82. maxLen = l
  83. idx = i
  84. val = matcher
  85. }
  86. }
  87. if val == nil { // not found matcher with static length
  88. r, err := compileMatchers(matchers[1:])
  89. if err != nil {
  90. return nil, err
  91. }
  92. return match.NewBTree(matchers[0], nil, r), nil
  93. }
  94. left := matchers[:idx]
  95. var right []match.Matcher
  96. if len(matchers) > idx+1 {
  97. right = matchers[idx+1:]
  98. }
  99. var l, r match.Matcher
  100. var err error
  101. if len(left) > 0 {
  102. l, err = compileMatchers(left)
  103. if err != nil {
  104. return nil, err
  105. }
  106. }
  107. if len(right) > 0 {
  108. r, err = compileMatchers(right)
  109. if err != nil {
  110. return nil, err
  111. }
  112. }
  113. return match.NewBTree(val, l, r), nil
  114. }
  115. func glueMatchers(matchers []match.Matcher) match.Matcher {
  116. if m := glueMatchersAsEvery(matchers); m != nil {
  117. return m
  118. }
  119. if m := glueMatchersAsRow(matchers); m != nil {
  120. return m
  121. }
  122. return nil
  123. }
  124. func glueMatchersAsRow(matchers []match.Matcher) match.Matcher {
  125. if len(matchers) <= 1 {
  126. return nil
  127. }
  128. var (
  129. c []match.Matcher
  130. l int
  131. )
  132. for _, matcher := range matchers {
  133. if ml := matcher.Len(); ml == -1 {
  134. return nil
  135. } else {
  136. c = append(c, matcher)
  137. l += ml
  138. }
  139. }
  140. return match.NewRow(l, c...)
  141. }
  142. func glueMatchersAsEvery(matchers []match.Matcher) match.Matcher {
  143. if len(matchers) <= 1 {
  144. return nil
  145. }
  146. var (
  147. hasAny bool
  148. hasSuper bool
  149. hasSingle bool
  150. min int
  151. separator []rune
  152. )
  153. for i, matcher := range matchers {
  154. var sep []rune
  155. switch m := matcher.(type) {
  156. case match.Super:
  157. sep = []rune{}
  158. hasSuper = true
  159. case match.Any:
  160. sep = m.Separators
  161. hasAny = true
  162. case match.Single:
  163. sep = m.Separators
  164. hasSingle = true
  165. min++
  166. case match.List:
  167. if !m.Not {
  168. return nil
  169. }
  170. sep = m.List
  171. hasSingle = true
  172. min++
  173. default:
  174. return nil
  175. }
  176. // initialize
  177. if i == 0 {
  178. separator = sep
  179. }
  180. if runes.Equal(sep, separator) {
  181. continue
  182. }
  183. return nil
  184. }
  185. if hasSuper && !hasAny && !hasSingle {
  186. return match.NewSuper()
  187. }
  188. if hasAny && !hasSuper && !hasSingle {
  189. return match.NewAny(separator)
  190. }
  191. if (hasAny || hasSuper) && min > 0 && len(separator) == 0 {
  192. return match.NewMin(min)
  193. }
  194. every := match.NewEveryOf()
  195. if min > 0 {
  196. every.Add(match.NewMin(min))
  197. if !hasAny && !hasSuper {
  198. every.Add(match.NewMax(min))
  199. }
  200. }
  201. if len(separator) > 0 {
  202. every.Add(match.NewContains(string(separator), true))
  203. }
  204. return every
  205. }
  206. func minimizeMatchers(matchers []match.Matcher) []match.Matcher {
  207. var done match.Matcher
  208. var left, right, count int
  209. for l := 0; l < len(matchers); l++ {
  210. for r := len(matchers); r > l; r-- {
  211. if glued := glueMatchers(matchers[l:r]); glued != nil {
  212. var swap bool
  213. if done == nil {
  214. swap = true
  215. } else {
  216. cl, gl := done.Len(), glued.Len()
  217. swap = cl > -1 && gl > -1 && gl > cl
  218. swap = swap || count < r-l
  219. }
  220. if swap {
  221. done = glued
  222. left = l
  223. right = r
  224. count = r - l
  225. }
  226. }
  227. }
  228. }
  229. if done == nil {
  230. return matchers
  231. }
  232. next := append(append([]match.Matcher{}, matchers[:left]...), done)
  233. if right < len(matchers) {
  234. next = append(next, matchers[right:]...)
  235. }
  236. if len(next) == len(matchers) {
  237. return next
  238. }
  239. return minimizeMatchers(next)
  240. }
  241. // minimizeAnyOf tries to apply some heuristics to minimize number of nodes in given tree
  242. func minimizeTree(tree *ast.Node) *ast.Node {
  243. switch tree.Kind {
  244. case ast.KindAnyOf:
  245. return minimizeTreeAnyOf(tree)
  246. default:
  247. return nil
  248. }
  249. }
  250. // minimizeAnyOf tries to find common children of given node of AnyOf pattern
  251. // it searches for common children from left and from right
  252. // if any common children are found – then it returns new optimized ast tree
  253. // else it returns nil
  254. func minimizeTreeAnyOf(tree *ast.Node) *ast.Node {
  255. if !areOfSameKind(tree.Children, ast.KindPattern) {
  256. return nil
  257. }
  258. commonLeft, commonRight := commonChildren(tree.Children)
  259. commonLeftCount, commonRightCount := len(commonLeft), len(commonRight)
  260. if commonLeftCount == 0 && commonRightCount == 0 { // there are no common parts
  261. return nil
  262. }
  263. var result []*ast.Node
  264. if commonLeftCount > 0 {
  265. result = append(result, ast.NewNode(ast.KindPattern, nil, commonLeft...))
  266. }
  267. var anyOf []*ast.Node
  268. for _, child := range tree.Children {
  269. reuse := child.Children[commonLeftCount : len(child.Children)-commonRightCount]
  270. var node *ast.Node
  271. if len(reuse) == 0 {
  272. // this pattern is completely reduced by commonLeft and commonRight patterns
  273. // so it become nothing
  274. node = ast.NewNode(ast.KindNothing, nil)
  275. } else {
  276. node = ast.NewNode(ast.KindPattern, nil, reuse...)
  277. }
  278. anyOf = appendIfUnique(anyOf, node)
  279. }
  280. switch {
  281. case len(anyOf) == 1 && anyOf[0].Kind != ast.KindNothing:
  282. result = append(result, anyOf[0])
  283. case len(anyOf) > 1:
  284. result = append(result, ast.NewNode(ast.KindAnyOf, nil, anyOf...))
  285. }
  286. if commonRightCount > 0 {
  287. result = append(result, ast.NewNode(ast.KindPattern, nil, commonRight...))
  288. }
  289. return ast.NewNode(ast.KindPattern, nil, result...)
  290. }
  291. func commonChildren(nodes []*ast.Node) (commonLeft, commonRight []*ast.Node) {
  292. if len(nodes) <= 1 {
  293. return
  294. }
  295. // find node that has least number of children
  296. idx := leastChildren(nodes)
  297. if idx == -1 {
  298. return
  299. }
  300. tree := nodes[idx]
  301. treeLength := len(tree.Children)
  302. // allocate max able size for rightCommon slice
  303. // to get ability insert elements in reverse order (from end to start)
  304. // without sorting
  305. commonRight = make([]*ast.Node, treeLength)
  306. lastRight := treeLength // will use this to get results as commonRight[lastRight:]
  307. var (
  308. breakLeft bool
  309. breakRight bool
  310. commonTotal int
  311. )
  312. for i, j := 0, treeLength-1; commonTotal < treeLength && j >= 0 && !(breakLeft && breakRight); i, j = i+1, j-1 {
  313. treeLeft := tree.Children[i]
  314. treeRight := tree.Children[j]
  315. for k := 0; k < len(nodes) && !(breakLeft && breakRight); k++ {
  316. // skip least children node
  317. if k == idx {
  318. continue
  319. }
  320. restLeft := nodes[k].Children[i]
  321. restRight := nodes[k].Children[j+len(nodes[k].Children)-treeLength]
  322. breakLeft = breakLeft || !treeLeft.Equal(restLeft)
  323. // disable searching for right common parts, if left part is already overlapping
  324. breakRight = breakRight || (!breakLeft && j <= i)
  325. breakRight = breakRight || !treeRight.Equal(restRight)
  326. }
  327. if !breakLeft {
  328. commonTotal++
  329. commonLeft = append(commonLeft, treeLeft)
  330. }
  331. if !breakRight {
  332. commonTotal++
  333. lastRight = j
  334. commonRight[j] = treeRight
  335. }
  336. }
  337. commonRight = commonRight[lastRight:]
  338. return
  339. }
  340. func appendIfUnique(target []*ast.Node, val *ast.Node) []*ast.Node {
  341. for _, n := range target {
  342. if reflect.DeepEqual(n, val) {
  343. return target
  344. }
  345. }
  346. return append(target, val)
  347. }
  348. func areOfSameKind(nodes []*ast.Node, kind ast.Kind) bool {
  349. for _, n := range nodes {
  350. if n.Kind != kind {
  351. return false
  352. }
  353. }
  354. return true
  355. }
  356. func leastChildren(nodes []*ast.Node) int {
  357. min := -1
  358. idx := -1
  359. for i, n := range nodes {
  360. if idx == -1 || (len(n.Children) < min) {
  361. min = len(n.Children)
  362. idx = i
  363. }
  364. }
  365. return idx
  366. }
  367. func compileTreeChildren(tree *ast.Node, sep []rune) ([]match.Matcher, error) {
  368. var matchers []match.Matcher
  369. for _, desc := range tree.Children {
  370. m, err := compile(desc, sep)
  371. if err != nil {
  372. return nil, err
  373. }
  374. matchers = append(matchers, optimizeMatcher(m))
  375. }
  376. return matchers, nil
  377. }
  378. func compile(tree *ast.Node, sep []rune) (m match.Matcher, err error) {
  379. switch tree.Kind {
  380. case ast.KindAnyOf:
  381. // todo this could be faster on pattern_alternatives_combine_lite (see glob_test.go)
  382. if n := minimizeTree(tree); n != nil {
  383. return compile(n, sep)
  384. }
  385. matchers, err := compileTreeChildren(tree, sep)
  386. if err != nil {
  387. return nil, err
  388. }
  389. return match.NewAnyOf(matchers...), nil
  390. case ast.KindPattern:
  391. if len(tree.Children) == 0 {
  392. return match.NewNothing(), nil
  393. }
  394. matchers, err := compileTreeChildren(tree, sep)
  395. if err != nil {
  396. return nil, err
  397. }
  398. m, err = compileMatchers(minimizeMatchers(matchers))
  399. if err != nil {
  400. return nil, err
  401. }
  402. case ast.KindAny:
  403. m = match.NewAny(sep)
  404. case ast.KindSuper:
  405. m = match.NewSuper()
  406. case ast.KindSingle:
  407. m = match.NewSingle(sep)
  408. case ast.KindNothing:
  409. m = match.NewNothing()
  410. case ast.KindList:
  411. l := tree.Value.(ast.List)
  412. m = match.NewList([]rune(l.Chars), l.Not)
  413. case ast.KindRange:
  414. r := tree.Value.(ast.Range)
  415. m = match.NewRange(r.Lo, r.Hi, r.Not)
  416. case ast.KindText:
  417. t := tree.Value.(ast.Text)
  418. m = match.NewText(t.Text)
  419. default:
  420. return nil, fmt.Errorf("could not compile tree: unknown node type")
  421. }
  422. return optimizeMatcher(m), nil
  423. }
  424. func Compile(tree *ast.Node, sep []rune) (match.Matcher, error) {
  425. m, err := compile(tree, sep)
  426. if err != nil {
  427. return nil, err
  428. }
  429. return m, nil
  430. }
上海开阖软件有限公司 沪ICP备12045867号-1