本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

377 lines
10KB

  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package upsidedown
  15. import (
  16. "bytes"
  17. "reflect"
  18. "sort"
  19. "sync/atomic"
  20. "github.com/blevesearch/bleve/index"
  21. "github.com/blevesearch/bleve/index/store"
  22. "github.com/blevesearch/bleve/size"
  23. )
  24. var reflectStaticSizeUpsideDownCouchTermFieldReader int
  25. var reflectStaticSizeUpsideDownCouchDocIDReader int
  26. func init() {
  27. var tfr UpsideDownCouchTermFieldReader
  28. reflectStaticSizeUpsideDownCouchTermFieldReader =
  29. int(reflect.TypeOf(tfr).Size())
  30. var cdr UpsideDownCouchDocIDReader
  31. reflectStaticSizeUpsideDownCouchDocIDReader =
  32. int(reflect.TypeOf(cdr).Size())
  33. }
  34. type UpsideDownCouchTermFieldReader struct {
  35. count uint64
  36. indexReader *IndexReader
  37. iterator store.KVIterator
  38. term []byte
  39. tfrNext *TermFrequencyRow
  40. tfrPrealloc TermFrequencyRow
  41. keyBuf []byte
  42. field uint16
  43. includeTermVectors bool
  44. }
  45. func (r *UpsideDownCouchTermFieldReader) Size() int {
  46. sizeInBytes := reflectStaticSizeUpsideDownCouchTermFieldReader + size.SizeOfPtr +
  47. len(r.term) +
  48. r.tfrPrealloc.Size() +
  49. len(r.keyBuf)
  50. if r.tfrNext != nil {
  51. sizeInBytes += r.tfrNext.Size()
  52. }
  53. return sizeInBytes
  54. }
  55. func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
  56. bufNeeded := termFrequencyRowKeySize(term, nil)
  57. if bufNeeded < dictionaryRowKeySize(term) {
  58. bufNeeded = dictionaryRowKeySize(term)
  59. }
  60. buf := make([]byte, bufNeeded)
  61. bufUsed := dictionaryRowKeyTo(buf, field, term)
  62. val, err := indexReader.kvreader.Get(buf[:bufUsed])
  63. if err != nil {
  64. return nil, err
  65. }
  66. if val == nil {
  67. atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
  68. rv := &UpsideDownCouchTermFieldReader{
  69. count: 0,
  70. term: term,
  71. field: field,
  72. includeTermVectors: includeTermVectors,
  73. }
  74. rv.tfrNext = &rv.tfrPrealloc
  75. return rv, nil
  76. }
  77. count, err := dictionaryRowParseV(val)
  78. if err != nil {
  79. return nil, err
  80. }
  81. bufUsed = termFrequencyRowKeyTo(buf, field, term, nil)
  82. it := indexReader.kvreader.PrefixIterator(buf[:bufUsed])
  83. atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
  84. return &UpsideDownCouchTermFieldReader{
  85. indexReader: indexReader,
  86. iterator: it,
  87. count: count,
  88. term: term,
  89. field: field,
  90. includeTermVectors: includeTermVectors,
  91. }, nil
  92. }
  93. func (r *UpsideDownCouchTermFieldReader) Count() uint64 {
  94. return r.count
  95. }
  96. func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
  97. if r.iterator != nil {
  98. // We treat tfrNext also like an initialization flag, which
  99. // tells us whether we need to invoke the underlying
  100. // iterator.Next(). The first time, don't call iterator.Next().
  101. if r.tfrNext != nil {
  102. r.iterator.Next()
  103. } else {
  104. r.tfrNext = &r.tfrPrealloc
  105. }
  106. key, val, valid := r.iterator.Current()
  107. if valid {
  108. tfr := r.tfrNext
  109. err := tfr.parseKDoc(key, r.term)
  110. if err != nil {
  111. return nil, err
  112. }
  113. err = tfr.parseV(val, r.includeTermVectors)
  114. if err != nil {
  115. return nil, err
  116. }
  117. rv := preAlloced
  118. if rv == nil {
  119. rv = &index.TermFieldDoc{}
  120. }
  121. rv.ID = append(rv.ID, tfr.doc...)
  122. rv.Freq = tfr.freq
  123. rv.Norm = float64(tfr.norm)
  124. if tfr.vectors != nil {
  125. rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors)
  126. }
  127. return rv, nil
  128. }
  129. }
  130. return nil, nil
  131. }
  132. func (r *UpsideDownCouchTermFieldReader) Advance(docID index.IndexInternalID, preAlloced *index.TermFieldDoc) (rv *index.TermFieldDoc, err error) {
  133. if r.iterator != nil {
  134. if r.tfrNext == nil {
  135. r.tfrNext = &TermFrequencyRow{}
  136. }
  137. tfr := InitTermFrequencyRow(r.tfrNext, r.term, r.field, docID, 0, 0)
  138. r.keyBuf, err = tfr.KeyAppendTo(r.keyBuf[:0])
  139. if err != nil {
  140. return nil, err
  141. }
  142. r.iterator.Seek(r.keyBuf)
  143. key, val, valid := r.iterator.Current()
  144. if valid {
  145. err := tfr.parseKDoc(key, r.term)
  146. if err != nil {
  147. return nil, err
  148. }
  149. err = tfr.parseV(val, r.includeTermVectors)
  150. if err != nil {
  151. return nil, err
  152. }
  153. rv = preAlloced
  154. if rv == nil {
  155. rv = &index.TermFieldDoc{}
  156. }
  157. rv.ID = append(rv.ID, tfr.doc...)
  158. rv.Freq = tfr.freq
  159. rv.Norm = float64(tfr.norm)
  160. if tfr.vectors != nil {
  161. rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors)
  162. }
  163. return rv, nil
  164. }
  165. }
  166. return nil, nil
  167. }
  168. func (r *UpsideDownCouchTermFieldReader) Close() error {
  169. if r.indexReader != nil {
  170. atomic.AddUint64(&r.indexReader.index.stats.termSearchersFinished, uint64(1))
  171. }
  172. if r.iterator != nil {
  173. return r.iterator.Close()
  174. }
  175. return nil
  176. }
  177. type UpsideDownCouchDocIDReader struct {
  178. indexReader *IndexReader
  179. iterator store.KVIterator
  180. only []string
  181. onlyPos int
  182. onlyMode bool
  183. }
  184. func (r *UpsideDownCouchDocIDReader) Size() int {
  185. sizeInBytes := reflectStaticSizeUpsideDownCouchDocIDReader +
  186. reflectStaticSizeIndexReader + size.SizeOfPtr
  187. for _, entry := range r.only {
  188. sizeInBytes += size.SizeOfString + len(entry)
  189. }
  190. return sizeInBytes
  191. }
  192. func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
  193. startBytes := []byte{0x0}
  194. endBytes := []byte{0xff}
  195. bisr := NewBackIndexRow(startBytes, nil, nil)
  196. bier := NewBackIndexRow(endBytes, nil, nil)
  197. it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key())
  198. return &UpsideDownCouchDocIDReader{
  199. indexReader: indexReader,
  200. iterator: it,
  201. }, nil
  202. }
  203. func newUpsideDownCouchDocIDReaderOnly(indexReader *IndexReader, ids []string) (*UpsideDownCouchDocIDReader, error) {
  204. // we don't actually own the list of ids, so if before we sort we must copy
  205. idsCopy := make([]string, len(ids))
  206. copy(idsCopy, ids)
  207. // ensure ids are sorted
  208. sort.Strings(idsCopy)
  209. startBytes := []byte{0x0}
  210. if len(idsCopy) > 0 {
  211. startBytes = []byte(idsCopy[0])
  212. }
  213. endBytes := []byte{0xff}
  214. if len(idsCopy) > 0 {
  215. endBytes = incrementBytes([]byte(idsCopy[len(idsCopy)-1]))
  216. }
  217. bisr := NewBackIndexRow(startBytes, nil, nil)
  218. bier := NewBackIndexRow(endBytes, nil, nil)
  219. it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key())
  220. return &UpsideDownCouchDocIDReader{
  221. indexReader: indexReader,
  222. iterator: it,
  223. only: idsCopy,
  224. onlyMode: true,
  225. }, nil
  226. }
  227. func (r *UpsideDownCouchDocIDReader) Next() (index.IndexInternalID, error) {
  228. key, val, valid := r.iterator.Current()
  229. if r.onlyMode {
  230. var rv index.IndexInternalID
  231. for valid && r.onlyPos < len(r.only) {
  232. br, err := NewBackIndexRowKV(key, val)
  233. if err != nil {
  234. return nil, err
  235. }
  236. if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) {
  237. ok := r.nextOnly()
  238. if !ok {
  239. return nil, nil
  240. }
  241. r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
  242. key, val, valid = r.iterator.Current()
  243. continue
  244. } else {
  245. rv = append([]byte(nil), br.doc...)
  246. break
  247. }
  248. }
  249. if valid && r.onlyPos < len(r.only) {
  250. ok := r.nextOnly()
  251. if ok {
  252. r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
  253. }
  254. return rv, nil
  255. }
  256. } else {
  257. if valid {
  258. br, err := NewBackIndexRowKV(key, val)
  259. if err != nil {
  260. return nil, err
  261. }
  262. rv := append([]byte(nil), br.doc...)
  263. r.iterator.Next()
  264. return rv, nil
  265. }
  266. }
  267. return nil, nil
  268. }
  269. func (r *UpsideDownCouchDocIDReader) Advance(docID index.IndexInternalID) (index.IndexInternalID, error) {
  270. if r.onlyMode {
  271. r.onlyPos = sort.SearchStrings(r.only, string(docID))
  272. if r.onlyPos >= len(r.only) {
  273. // advanced to key after our last only key
  274. return nil, nil
  275. }
  276. r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
  277. key, val, valid := r.iterator.Current()
  278. var rv index.IndexInternalID
  279. for valid && r.onlyPos < len(r.only) {
  280. br, err := NewBackIndexRowKV(key, val)
  281. if err != nil {
  282. return nil, err
  283. }
  284. if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) {
  285. // the only key we seek'd to didn't exist
  286. // now look for the closest key that did exist in only
  287. r.onlyPos = sort.SearchStrings(r.only, string(br.doc))
  288. if r.onlyPos >= len(r.only) {
  289. // advanced to key after our last only key
  290. return nil, nil
  291. }
  292. // now seek to this new only key
  293. r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
  294. key, val, valid = r.iterator.Current()
  295. continue
  296. } else {
  297. rv = append([]byte(nil), br.doc...)
  298. break
  299. }
  300. }
  301. if valid && r.onlyPos < len(r.only) {
  302. ok := r.nextOnly()
  303. if ok {
  304. r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
  305. }
  306. return rv, nil
  307. }
  308. } else {
  309. bir := NewBackIndexRow(docID, nil, nil)
  310. r.iterator.Seek(bir.Key())
  311. key, val, valid := r.iterator.Current()
  312. if valid {
  313. br, err := NewBackIndexRowKV(key, val)
  314. if err != nil {
  315. return nil, err
  316. }
  317. rv := append([]byte(nil), br.doc...)
  318. r.iterator.Next()
  319. return rv, nil
  320. }
  321. }
  322. return nil, nil
  323. }
  324. func (r *UpsideDownCouchDocIDReader) Close() error {
  325. return r.iterator.Close()
  326. }
  327. // move the r.only pos forward one, skipping duplicates
  328. // return true if there is more data, or false if we got to the end of the list
  329. func (r *UpsideDownCouchDocIDReader) nextOnly() bool {
  330. // advance 1 position, until we see a different key
  331. // it's already sorted, so this skips duplicates
  332. start := r.onlyPos
  333. r.onlyPos++
  334. for r.onlyPos < len(r.only) && r.only[r.onlyPos] == r.only[start] {
  335. start = r.onlyPos
  336. r.onlyPos++
  337. }
  338. // inidicate if we got to the end of the list
  339. return r.onlyPos < len(r.only)
  340. }
上海开阖软件有限公司 沪ICP备12045867号-1