本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

439 lines
11KB

  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package searcher
  15. import (
  16. "math"
  17. "reflect"
  18. "github.com/blevesearch/bleve/index"
  19. "github.com/blevesearch/bleve/search"
  20. "github.com/blevesearch/bleve/search/scorer"
  21. "github.com/blevesearch/bleve/size"
  22. )
  23. var reflectStaticSizeBooleanSearcher int
  24. func init() {
  25. var bs BooleanSearcher
  26. reflectStaticSizeBooleanSearcher = int(reflect.TypeOf(bs).Size())
  27. }
  28. type BooleanSearcher struct {
  29. indexReader index.IndexReader
  30. mustSearcher search.Searcher
  31. shouldSearcher search.Searcher
  32. mustNotSearcher search.Searcher
  33. queryNorm float64
  34. currMust *search.DocumentMatch
  35. currShould *search.DocumentMatch
  36. currMustNot *search.DocumentMatch
  37. currentID index.IndexInternalID
  38. min uint64
  39. scorer *scorer.ConjunctionQueryScorer
  40. matches []*search.DocumentMatch
  41. initialized bool
  42. }
  43. func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) {
  44. // build our searcher
  45. rv := BooleanSearcher{
  46. indexReader: indexReader,
  47. mustSearcher: mustSearcher,
  48. shouldSearcher: shouldSearcher,
  49. mustNotSearcher: mustNotSearcher,
  50. scorer: scorer.NewConjunctionQueryScorer(options),
  51. matches: make([]*search.DocumentMatch, 2),
  52. }
  53. rv.computeQueryNorm()
  54. return &rv, nil
  55. }
  56. func (s *BooleanSearcher) Size() int {
  57. sizeInBytes := reflectStaticSizeBooleanSearcher + size.SizeOfPtr
  58. if s.mustSearcher != nil {
  59. sizeInBytes += s.mustSearcher.Size()
  60. }
  61. if s.shouldSearcher != nil {
  62. sizeInBytes += s.shouldSearcher.Size()
  63. }
  64. if s.mustNotSearcher != nil {
  65. sizeInBytes += s.mustNotSearcher.Size()
  66. }
  67. sizeInBytes += s.scorer.Size()
  68. for _, entry := range s.matches {
  69. if entry != nil {
  70. sizeInBytes += entry.Size()
  71. }
  72. }
  73. return sizeInBytes
  74. }
  75. func (s *BooleanSearcher) computeQueryNorm() {
  76. // first calculate sum of squared weights
  77. sumOfSquaredWeights := 0.0
  78. if s.mustSearcher != nil {
  79. sumOfSquaredWeights += s.mustSearcher.Weight()
  80. }
  81. if s.shouldSearcher != nil {
  82. sumOfSquaredWeights += s.shouldSearcher.Weight()
  83. }
  84. // now compute query norm from this
  85. s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
  86. // finally tell all the downstream searchers the norm
  87. if s.mustSearcher != nil {
  88. s.mustSearcher.SetQueryNorm(s.queryNorm)
  89. }
  90. if s.shouldSearcher != nil {
  91. s.shouldSearcher.SetQueryNorm(s.queryNorm)
  92. }
  93. }
  94. func (s *BooleanSearcher) initSearchers(ctx *search.SearchContext) error {
  95. var err error
  96. // get all searchers pointing at their first match
  97. if s.mustSearcher != nil {
  98. if s.currMust != nil {
  99. ctx.DocumentMatchPool.Put(s.currMust)
  100. }
  101. s.currMust, err = s.mustSearcher.Next(ctx)
  102. if err != nil {
  103. return err
  104. }
  105. }
  106. if s.shouldSearcher != nil {
  107. if s.currShould != nil {
  108. ctx.DocumentMatchPool.Put(s.currShould)
  109. }
  110. s.currShould, err = s.shouldSearcher.Next(ctx)
  111. if err != nil {
  112. return err
  113. }
  114. }
  115. if s.mustNotSearcher != nil {
  116. if s.currMustNot != nil {
  117. ctx.DocumentMatchPool.Put(s.currMustNot)
  118. }
  119. s.currMustNot, err = s.mustNotSearcher.Next(ctx)
  120. if err != nil {
  121. return err
  122. }
  123. }
  124. if s.mustSearcher != nil && s.currMust != nil {
  125. s.currentID = s.currMust.IndexInternalID
  126. } else if s.mustSearcher == nil && s.currShould != nil {
  127. s.currentID = s.currShould.IndexInternalID
  128. } else {
  129. s.currentID = nil
  130. }
  131. s.initialized = true
  132. return nil
  133. }
  134. func (s *BooleanSearcher) advanceNextMust(ctx *search.SearchContext, skipReturn *search.DocumentMatch) error {
  135. var err error
  136. if s.mustSearcher != nil {
  137. if s.currMust != skipReturn {
  138. ctx.DocumentMatchPool.Put(s.currMust)
  139. }
  140. s.currMust, err = s.mustSearcher.Next(ctx)
  141. if err != nil {
  142. return err
  143. }
  144. } else {
  145. if s.currShould != skipReturn {
  146. ctx.DocumentMatchPool.Put(s.currShould)
  147. }
  148. s.currShould, err = s.shouldSearcher.Next(ctx)
  149. if err != nil {
  150. return err
  151. }
  152. }
  153. if s.mustSearcher != nil && s.currMust != nil {
  154. s.currentID = s.currMust.IndexInternalID
  155. } else if s.mustSearcher == nil && s.currShould != nil {
  156. s.currentID = s.currShould.IndexInternalID
  157. } else {
  158. s.currentID = nil
  159. }
  160. return nil
  161. }
  162. func (s *BooleanSearcher) Weight() float64 {
  163. var rv float64
  164. if s.mustSearcher != nil {
  165. rv += s.mustSearcher.Weight()
  166. }
  167. if s.shouldSearcher != nil {
  168. rv += s.shouldSearcher.Weight()
  169. }
  170. return rv
  171. }
  172. func (s *BooleanSearcher) SetQueryNorm(qnorm float64) {
  173. if s.mustSearcher != nil {
  174. s.mustSearcher.SetQueryNorm(qnorm)
  175. }
  176. if s.shouldSearcher != nil {
  177. s.shouldSearcher.SetQueryNorm(qnorm)
  178. }
  179. }
  180. func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
  181. if !s.initialized {
  182. err := s.initSearchers(ctx)
  183. if err != nil {
  184. return nil, err
  185. }
  186. }
  187. var err error
  188. var rv *search.DocumentMatch
  189. for s.currentID != nil {
  190. if s.currMustNot != nil {
  191. cmp := s.currMustNot.IndexInternalID.Compare(s.currentID)
  192. if cmp < 0 {
  193. ctx.DocumentMatchPool.Put(s.currMustNot)
  194. // advance must not searcher to our candidate entry
  195. s.currMustNot, err = s.mustNotSearcher.Advance(ctx, s.currentID)
  196. if err != nil {
  197. return nil, err
  198. }
  199. if s.currMustNot != nil && s.currMustNot.IndexInternalID.Equals(s.currentID) {
  200. // the candidate is excluded
  201. err = s.advanceNextMust(ctx, nil)
  202. if err != nil {
  203. return nil, err
  204. }
  205. continue
  206. }
  207. } else if cmp == 0 {
  208. // the candidate is excluded
  209. err = s.advanceNextMust(ctx, nil)
  210. if err != nil {
  211. return nil, err
  212. }
  213. continue
  214. }
  215. }
  216. shouldCmpOrNil := 1 // NOTE: shouldCmp will also be 1 when currShould == nil.
  217. if s.currShould != nil {
  218. shouldCmpOrNil = s.currShould.IndexInternalID.Compare(s.currentID)
  219. }
  220. if shouldCmpOrNil < 0 {
  221. ctx.DocumentMatchPool.Put(s.currShould)
  222. // advance should searcher to our candidate entry
  223. s.currShould, err = s.shouldSearcher.Advance(ctx, s.currentID)
  224. if err != nil {
  225. return nil, err
  226. }
  227. if s.currShould != nil && s.currShould.IndexInternalID.Equals(s.currentID) {
  228. // score bonus matches should
  229. var cons []*search.DocumentMatch
  230. if s.currMust != nil {
  231. cons = s.matches
  232. cons[0] = s.currMust
  233. cons[1] = s.currShould
  234. } else {
  235. cons = s.matches[0:1]
  236. cons[0] = s.currShould
  237. }
  238. rv = s.scorer.Score(ctx, cons)
  239. err = s.advanceNextMust(ctx, rv)
  240. if err != nil {
  241. return nil, err
  242. }
  243. break
  244. } else if s.shouldSearcher.Min() == 0 {
  245. // match is OK anyway
  246. cons := s.matches[0:1]
  247. cons[0] = s.currMust
  248. rv = s.scorer.Score(ctx, cons)
  249. err = s.advanceNextMust(ctx, rv)
  250. if err != nil {
  251. return nil, err
  252. }
  253. break
  254. }
  255. } else if shouldCmpOrNil == 0 {
  256. // score bonus matches should
  257. var cons []*search.DocumentMatch
  258. if s.currMust != nil {
  259. cons = s.matches
  260. cons[0] = s.currMust
  261. cons[1] = s.currShould
  262. } else {
  263. cons = s.matches[0:1]
  264. cons[0] = s.currShould
  265. }
  266. rv = s.scorer.Score(ctx, cons)
  267. err = s.advanceNextMust(ctx, rv)
  268. if err != nil {
  269. return nil, err
  270. }
  271. break
  272. } else if s.shouldSearcher == nil || s.shouldSearcher.Min() == 0 {
  273. // match is OK anyway
  274. cons := s.matches[0:1]
  275. cons[0] = s.currMust
  276. rv = s.scorer.Score(ctx, cons)
  277. err = s.advanceNextMust(ctx, rv)
  278. if err != nil {
  279. return nil, err
  280. }
  281. break
  282. }
  283. err = s.advanceNextMust(ctx, nil)
  284. if err != nil {
  285. return nil, err
  286. }
  287. }
  288. return rv, nil
  289. }
  290. func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
  291. if !s.initialized {
  292. err := s.initSearchers(ctx)
  293. if err != nil {
  294. return nil, err
  295. }
  296. }
  297. // Advance the searchers only if the currentID cursor is trailing the lookup ID,
  298. // additionally if the mustNotSearcher has been initialized, ensure that the
  299. // cursor used to track the mustNotSearcher (currMustNot, which isn't tracked by
  300. // currentID) is trailing the lookup ID as well - for in the case where currentID
  301. // is nil and currMustNot is already at or ahead of the lookup ID, we MUST NOT
  302. // advance the currentID or the currMustNot cursors.
  303. if (s.currentID == nil || s.currentID.Compare(ID) < 0) &&
  304. (s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0) {
  305. var err error
  306. if s.mustSearcher != nil {
  307. if s.currMust != nil {
  308. ctx.DocumentMatchPool.Put(s.currMust)
  309. }
  310. s.currMust, err = s.mustSearcher.Advance(ctx, ID)
  311. if err != nil {
  312. return nil, err
  313. }
  314. }
  315. if s.shouldSearcher != nil {
  316. if s.currShould != nil {
  317. ctx.DocumentMatchPool.Put(s.currShould)
  318. }
  319. s.currShould, err = s.shouldSearcher.Advance(ctx, ID)
  320. if err != nil {
  321. return nil, err
  322. }
  323. }
  324. if s.mustNotSearcher != nil {
  325. if s.currMustNot != nil {
  326. ctx.DocumentMatchPool.Put(s.currMustNot)
  327. }
  328. s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
  329. if err != nil {
  330. return nil, err
  331. }
  332. }
  333. if s.mustSearcher != nil && s.currMust != nil {
  334. s.currentID = s.currMust.IndexInternalID
  335. } else if s.mustSearcher == nil && s.currShould != nil {
  336. s.currentID = s.currShould.IndexInternalID
  337. } else {
  338. s.currentID = nil
  339. }
  340. }
  341. return s.Next(ctx)
  342. }
  343. func (s *BooleanSearcher) Count() uint64 {
  344. // for now return a worst case
  345. var sum uint64
  346. if s.mustSearcher != nil {
  347. sum += s.mustSearcher.Count()
  348. }
  349. if s.shouldSearcher != nil {
  350. sum += s.shouldSearcher.Count()
  351. }
  352. return sum
  353. }
  354. func (s *BooleanSearcher) Close() error {
  355. var err0, err1, err2 error
  356. if s.mustSearcher != nil {
  357. err0 = s.mustSearcher.Close()
  358. }
  359. if s.shouldSearcher != nil {
  360. err1 = s.shouldSearcher.Close()
  361. }
  362. if s.mustNotSearcher != nil {
  363. err2 = s.mustNotSearcher.Close()
  364. }
  365. if err0 != nil {
  366. return err0
  367. }
  368. if err1 != nil {
  369. return err1
  370. }
  371. if err2 != nil {
  372. return err2
  373. }
  374. return nil
  375. }
  376. func (s *BooleanSearcher) Min() int {
  377. return 0
  378. }
  379. func (s *BooleanSearcher) DocumentMatchPoolSize() int {
  380. rv := 3
  381. if s.mustSearcher != nil {
  382. rv += s.mustSearcher.DocumentMatchPoolSize()
  383. }
  384. if s.shouldSearcher != nil {
  385. rv += s.shouldSearcher.DocumentMatchPoolSize()
  386. }
  387. if s.mustNotSearcher != nil {
  388. rv += s.mustNotSearcher.DocumentMatchPoolSize()
  389. }
  390. return rv
  391. }
上海开阖软件有限公司 沪ICP备12045867号-1