本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

285 lines
6.7KB

  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package searcher
  15. import (
  16. "math"
  17. "reflect"
  18. "sort"
  19. "github.com/blevesearch/bleve/index"
  20. "github.com/blevesearch/bleve/search"
  21. "github.com/blevesearch/bleve/search/scorer"
  22. "github.com/blevesearch/bleve/size"
  23. )
  24. var reflectStaticSizeConjunctionSearcher int
  25. func init() {
  26. var cs ConjunctionSearcher
  27. reflectStaticSizeConjunctionSearcher = int(reflect.TypeOf(cs).Size())
  28. }
  29. type ConjunctionSearcher struct {
  30. indexReader index.IndexReader
  31. searchers OrderedSearcherList
  32. queryNorm float64
  33. currs []*search.DocumentMatch
  34. maxIDIdx int
  35. scorer *scorer.ConjunctionQueryScorer
  36. initialized bool
  37. options search.SearcherOptions
  38. }
  39. func NewConjunctionSearcher(indexReader index.IndexReader,
  40. qsearchers []search.Searcher, options search.SearcherOptions) (
  41. search.Searcher, error) {
  42. // build the sorted downstream searchers
  43. searchers := make(OrderedSearcherList, len(qsearchers))
  44. for i, searcher := range qsearchers {
  45. searchers[i] = searcher
  46. }
  47. sort.Sort(searchers)
  48. // attempt the "unadorned" conjunction optimization only when we
  49. // do not need extra information like freq-norm's or term vectors
  50. if len(searchers) > 1 &&
  51. options.Score == "none" && !options.IncludeTermVectors {
  52. rv, err := optimizeCompositeSearcher("conjunction:unadorned",
  53. indexReader, searchers, options)
  54. if err != nil || rv != nil {
  55. return rv, err
  56. }
  57. }
  58. // build our searcher
  59. rv := ConjunctionSearcher{
  60. indexReader: indexReader,
  61. options: options,
  62. searchers: searchers,
  63. currs: make([]*search.DocumentMatch, len(searchers)),
  64. scorer: scorer.NewConjunctionQueryScorer(options),
  65. }
  66. rv.computeQueryNorm()
  67. // attempt push-down conjunction optimization when there's >1 searchers
  68. if len(searchers) > 1 {
  69. rv, err := optimizeCompositeSearcher("conjunction",
  70. indexReader, searchers, options)
  71. if err != nil || rv != nil {
  72. return rv, err
  73. }
  74. }
  75. return &rv, nil
  76. }
  77. func (s *ConjunctionSearcher) Size() int {
  78. sizeInBytes := reflectStaticSizeConjunctionSearcher + size.SizeOfPtr +
  79. s.scorer.Size()
  80. for _, entry := range s.searchers {
  81. sizeInBytes += entry.Size()
  82. }
  83. for _, entry := range s.currs {
  84. if entry != nil {
  85. sizeInBytes += entry.Size()
  86. }
  87. }
  88. return sizeInBytes
  89. }
  90. func (s *ConjunctionSearcher) computeQueryNorm() {
  91. // first calculate sum of squared weights
  92. sumOfSquaredWeights := 0.0
  93. for _, searcher := range s.searchers {
  94. sumOfSquaredWeights += searcher.Weight()
  95. }
  96. // now compute query norm from this
  97. s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
  98. // finally tell all the downstream searchers the norm
  99. for _, searcher := range s.searchers {
  100. searcher.SetQueryNorm(s.queryNorm)
  101. }
  102. }
  103. func (s *ConjunctionSearcher) initSearchers(ctx *search.SearchContext) error {
  104. var err error
  105. // get all searchers pointing at their first match
  106. for i, searcher := range s.searchers {
  107. if s.currs[i] != nil {
  108. ctx.DocumentMatchPool.Put(s.currs[i])
  109. }
  110. s.currs[i], err = searcher.Next(ctx)
  111. if err != nil {
  112. return err
  113. }
  114. }
  115. s.initialized = true
  116. return nil
  117. }
  118. func (s *ConjunctionSearcher) Weight() float64 {
  119. var rv float64
  120. for _, searcher := range s.searchers {
  121. rv += searcher.Weight()
  122. }
  123. return rv
  124. }
  125. func (s *ConjunctionSearcher) SetQueryNorm(qnorm float64) {
  126. for _, searcher := range s.searchers {
  127. searcher.SetQueryNorm(qnorm)
  128. }
  129. }
  130. func (s *ConjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
  131. if !s.initialized {
  132. err := s.initSearchers(ctx)
  133. if err != nil {
  134. return nil, err
  135. }
  136. }
  137. var rv *search.DocumentMatch
  138. var err error
  139. OUTER:
  140. for s.maxIDIdx < len(s.currs) && s.currs[s.maxIDIdx] != nil {
  141. maxID := s.currs[s.maxIDIdx].IndexInternalID
  142. i := 0
  143. for i < len(s.currs) {
  144. if s.currs[i] == nil {
  145. return nil, nil
  146. }
  147. if i == s.maxIDIdx {
  148. i++
  149. continue
  150. }
  151. cmp := maxID.Compare(s.currs[i].IndexInternalID)
  152. if cmp == 0 {
  153. i++
  154. continue
  155. }
  156. if cmp < 0 {
  157. // maxID < currs[i], so we found a new maxIDIdx
  158. s.maxIDIdx = i
  159. // advance the positions where [0 <= x < i], since we
  160. // know they were equal to the former max entry
  161. maxID = s.currs[s.maxIDIdx].IndexInternalID
  162. for x := 0; x < i; x++ {
  163. err = s.advanceChild(ctx, x, maxID)
  164. if err != nil {
  165. return nil, err
  166. }
  167. }
  168. continue OUTER
  169. }
  170. // maxID > currs[i], so need to advance searchers[i]
  171. err = s.advanceChild(ctx, i, maxID)
  172. if err != nil {
  173. return nil, err
  174. }
  175. // don't bump i, so that we'll examine the just-advanced
  176. // currs[i] again
  177. }
  178. // if we get here, a doc matched all readers, so score and add it
  179. rv = s.scorer.Score(ctx, s.currs)
  180. // we know all the searchers are pointing at the same thing
  181. // so they all need to be bumped
  182. for i, searcher := range s.searchers {
  183. if s.currs[i] != rv {
  184. ctx.DocumentMatchPool.Put(s.currs[i])
  185. }
  186. s.currs[i], err = searcher.Next(ctx)
  187. if err != nil {
  188. return nil, err
  189. }
  190. }
  191. // don't continue now, wait for the next call to Next()
  192. break
  193. }
  194. return rv, nil
  195. }
  196. func (s *ConjunctionSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
  197. if !s.initialized {
  198. err := s.initSearchers(ctx)
  199. if err != nil {
  200. return nil, err
  201. }
  202. }
  203. for i := range s.searchers {
  204. if s.currs[i] != nil && s.currs[i].IndexInternalID.Compare(ID) >= 0 {
  205. continue
  206. }
  207. err := s.advanceChild(ctx, i, ID)
  208. if err != nil {
  209. return nil, err
  210. }
  211. }
  212. return s.Next(ctx)
  213. }
  214. func (s *ConjunctionSearcher) advanceChild(ctx *search.SearchContext, i int, ID index.IndexInternalID) (err error) {
  215. if s.currs[i] != nil {
  216. ctx.DocumentMatchPool.Put(s.currs[i])
  217. }
  218. s.currs[i], err = s.searchers[i].Advance(ctx, ID)
  219. return err
  220. }
  221. func (s *ConjunctionSearcher) Count() uint64 {
  222. // for now return a worst case
  223. var sum uint64
  224. for _, searcher := range s.searchers {
  225. sum += searcher.Count()
  226. }
  227. return sum
  228. }
  229. func (s *ConjunctionSearcher) Close() (rv error) {
  230. for _, searcher := range s.searchers {
  231. err := searcher.Close()
  232. if err != nil && rv == nil {
  233. rv = err
  234. }
  235. }
  236. return rv
  237. }
  238. func (s *ConjunctionSearcher) Min() int {
  239. return 0
  240. }
  241. func (s *ConjunctionSearcher) DocumentMatchPoolSize() int {
  242. rv := len(s.currs)
  243. for _, s := range s.searchers {
  244. rv += s.DocumentMatchPoolSize()
  245. }
  246. return rv
  247. }
上海开阖软件有限公司 沪ICP备12045867号-1