本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

121 lines
3.4KB

  1. // Copyright (c) 2015 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package searcher
  15. import (
  16. "regexp"
  17. "github.com/blevesearch/bleve/index"
  18. "github.com/blevesearch/bleve/search"
  19. )
  20. // NewRegexpStringSearcher is similar to NewRegexpSearcher, but
  21. // additionally optimizes for index readers that handle regexp's.
  22. func NewRegexpStringSearcher(indexReader index.IndexReader, pattern string,
  23. field string, boost float64, options search.SearcherOptions) (
  24. search.Searcher, error) {
  25. ir, ok := indexReader.(index.IndexReaderRegexp)
  26. if !ok {
  27. r, err := regexp.Compile(pattern)
  28. if err != nil {
  29. return nil, err
  30. }
  31. return NewRegexpSearcher(indexReader, r, field, boost, options)
  32. }
  33. fieldDict, err := ir.FieldDictRegexp(field, pattern)
  34. if err != nil {
  35. return nil, err
  36. }
  37. defer func() {
  38. if cerr := fieldDict.Close(); cerr != nil && err == nil {
  39. err = cerr
  40. }
  41. }()
  42. var candidateTerms []string
  43. tfd, err := fieldDict.Next()
  44. for err == nil && tfd != nil {
  45. candidateTerms = append(candidateTerms, tfd.Term)
  46. tfd, err = fieldDict.Next()
  47. }
  48. if err != nil {
  49. return nil, err
  50. }
  51. return NewMultiTermSearcher(indexReader, candidateTerms, field, boost,
  52. options, true)
  53. }
  54. // NewRegexpSearcher creates a searcher which will match documents that
  55. // contain terms which match the pattern regexp. The match must be EXACT
  56. // matching the entire term. The provided regexp SHOULD NOT start with ^
  57. // or end with $ as this can intefere with the implementation. Separately,
  58. // matches will be checked to ensure they match the entire term.
  59. func NewRegexpSearcher(indexReader index.IndexReader, pattern index.Regexp,
  60. field string, boost float64, options search.SearcherOptions) (
  61. search.Searcher, error) {
  62. var candidateTerms []string
  63. prefixTerm, complete := pattern.LiteralPrefix()
  64. if complete {
  65. // there is no pattern
  66. candidateTerms = []string{prefixTerm}
  67. } else {
  68. var err error
  69. candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field,
  70. prefixTerm)
  71. if err != nil {
  72. return nil, err
  73. }
  74. }
  75. return NewMultiTermSearcher(indexReader, candidateTerms, field, boost,
  76. options, true)
  77. }
  78. func findRegexpCandidateTerms(indexReader index.IndexReader,
  79. pattern index.Regexp, field, prefixTerm string) (rv []string, err error) {
  80. rv = make([]string, 0)
  81. var fieldDict index.FieldDict
  82. if len(prefixTerm) > 0 {
  83. fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
  84. } else {
  85. fieldDict, err = indexReader.FieldDict(field)
  86. }
  87. defer func() {
  88. if cerr := fieldDict.Close(); cerr != nil && err == nil {
  89. err = cerr
  90. }
  91. }()
  92. // enumerate the terms and check against regexp
  93. tfd, err := fieldDict.Next()
  94. for err == nil && tfd != nil {
  95. matchPos := pattern.FindStringIndex(tfd.Term)
  96. if matchPos != nil && matchPos[0] == 0 && matchPos[1] == len(tfd.Term) {
  97. rv = append(rv, tfd.Term)
  98. if tooManyClauses(len(rv)) {
  99. return rv, tooManyClausesErr(len(rv))
  100. }
  101. }
  102. tfd, err = fieldDict.Next()
  103. }
  104. return rv, err
  105. }
上海开阖软件有限公司 沪ICP备12045867号-1