本站源代码
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1142 lines
27KB

  1. // Copyright (c) 2014 Couchbase, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package upsidedown
  15. import (
  16. "bytes"
  17. "encoding/binary"
  18. "fmt"
  19. "io"
  20. "math"
  21. "reflect"
  22. "github.com/blevesearch/bleve/size"
  23. "github.com/golang/protobuf/proto"
  24. )
  25. var reflectStaticSizeTermFrequencyRow int
  26. var reflectStaticSizeTermVector int
  27. func init() {
  28. var tfr TermFrequencyRow
  29. reflectStaticSizeTermFrequencyRow = int(reflect.TypeOf(tfr).Size())
  30. var tv TermVector
  31. reflectStaticSizeTermVector = int(reflect.TypeOf(tv).Size())
  32. }
  33. const ByteSeparator byte = 0xff
  34. type UpsideDownCouchRowStream chan UpsideDownCouchRow
  35. type UpsideDownCouchRow interface {
  36. KeySize() int
  37. KeyTo([]byte) (int, error)
  38. Key() []byte
  39. Value() []byte
  40. ValueSize() int
  41. ValueTo([]byte) (int, error)
  42. }
  43. func ParseFromKeyValue(key, value []byte) (UpsideDownCouchRow, error) {
  44. if len(key) > 0 {
  45. switch key[0] {
  46. case 'v':
  47. return NewVersionRowKV(key, value)
  48. case 'f':
  49. return NewFieldRowKV(key, value)
  50. case 'd':
  51. return NewDictionaryRowKV(key, value)
  52. case 't':
  53. return NewTermFrequencyRowKV(key, value)
  54. case 'b':
  55. return NewBackIndexRowKV(key, value)
  56. case 's':
  57. return NewStoredRowKV(key, value)
  58. case 'i':
  59. return NewInternalRowKV(key, value)
  60. }
  61. return nil, fmt.Errorf("Unknown field type '%s'", string(key[0]))
  62. }
  63. return nil, fmt.Errorf("Invalid empty key")
  64. }
  65. // VERSION
  66. type VersionRow struct {
  67. version uint8
  68. }
  69. func (v *VersionRow) Key() []byte {
  70. return []byte{'v'}
  71. }
  72. func (v *VersionRow) KeySize() int {
  73. return 1
  74. }
  75. func (v *VersionRow) KeyTo(buf []byte) (int, error) {
  76. buf[0] = 'v'
  77. return 1, nil
  78. }
  79. func (v *VersionRow) Value() []byte {
  80. return []byte{byte(v.version)}
  81. }
  82. func (v *VersionRow) ValueSize() int {
  83. return 1
  84. }
  85. func (v *VersionRow) ValueTo(buf []byte) (int, error) {
  86. buf[0] = v.version
  87. return 1, nil
  88. }
  89. func (v *VersionRow) String() string {
  90. return fmt.Sprintf("Version: %d", v.version)
  91. }
  92. func NewVersionRow(version uint8) *VersionRow {
  93. return &VersionRow{
  94. version: version,
  95. }
  96. }
  97. func NewVersionRowKV(key, value []byte) (*VersionRow, error) {
  98. rv := VersionRow{}
  99. buf := bytes.NewBuffer(value)
  100. err := binary.Read(buf, binary.LittleEndian, &rv.version)
  101. if err != nil {
  102. return nil, err
  103. }
  104. return &rv, nil
  105. }
  106. // INTERNAL STORAGE
  107. type InternalRow struct {
  108. key []byte
  109. val []byte
  110. }
  111. func (i *InternalRow) Key() []byte {
  112. buf := make([]byte, i.KeySize())
  113. size, _ := i.KeyTo(buf)
  114. return buf[:size]
  115. }
  116. func (i *InternalRow) KeySize() int {
  117. return len(i.key) + 1
  118. }
  119. func (i *InternalRow) KeyTo(buf []byte) (int, error) {
  120. buf[0] = 'i'
  121. actual := copy(buf[1:], i.key)
  122. return 1 + actual, nil
  123. }
  124. func (i *InternalRow) Value() []byte {
  125. return i.val
  126. }
  127. func (i *InternalRow) ValueSize() int {
  128. return len(i.val)
  129. }
  130. func (i *InternalRow) ValueTo(buf []byte) (int, error) {
  131. actual := copy(buf, i.val)
  132. return actual, nil
  133. }
  134. func (i *InternalRow) String() string {
  135. return fmt.Sprintf("InternalStore - Key: %s (% x) Val: %s (% x)", i.key, i.key, i.val, i.val)
  136. }
  137. func NewInternalRow(key, val []byte) *InternalRow {
  138. return &InternalRow{
  139. key: key,
  140. val: val,
  141. }
  142. }
  143. func NewInternalRowKV(key, value []byte) (*InternalRow, error) {
  144. rv := InternalRow{}
  145. rv.key = key[1:]
  146. rv.val = value
  147. return &rv, nil
  148. }
  149. // FIELD definition
  150. type FieldRow struct {
  151. index uint16
  152. name string
  153. }
  154. func (f *FieldRow) Key() []byte {
  155. buf := make([]byte, f.KeySize())
  156. size, _ := f.KeyTo(buf)
  157. return buf[:size]
  158. }
  159. func (f *FieldRow) KeySize() int {
  160. return 3
  161. }
  162. func (f *FieldRow) KeyTo(buf []byte) (int, error) {
  163. buf[0] = 'f'
  164. binary.LittleEndian.PutUint16(buf[1:3], f.index)
  165. return 3, nil
  166. }
  167. func (f *FieldRow) Value() []byte {
  168. return append([]byte(f.name), ByteSeparator)
  169. }
  170. func (f *FieldRow) ValueSize() int {
  171. return len(f.name) + 1
  172. }
  173. func (f *FieldRow) ValueTo(buf []byte) (int, error) {
  174. size := copy(buf, f.name)
  175. buf[size] = ByteSeparator
  176. return size + 1, nil
  177. }
  178. func (f *FieldRow) String() string {
  179. return fmt.Sprintf("Field: %d Name: %s", f.index, f.name)
  180. }
  181. func NewFieldRow(index uint16, name string) *FieldRow {
  182. return &FieldRow{
  183. index: index,
  184. name: name,
  185. }
  186. }
  187. func NewFieldRowKV(key, value []byte) (*FieldRow, error) {
  188. rv := FieldRow{}
  189. buf := bytes.NewBuffer(key)
  190. _, err := buf.ReadByte() // type
  191. if err != nil {
  192. return nil, err
  193. }
  194. err = binary.Read(buf, binary.LittleEndian, &rv.index)
  195. if err != nil {
  196. return nil, err
  197. }
  198. buf = bytes.NewBuffer(value)
  199. rv.name, err = buf.ReadString(ByteSeparator)
  200. if err != nil {
  201. return nil, err
  202. }
  203. rv.name = rv.name[:len(rv.name)-1] // trim off separator byte
  204. return &rv, nil
  205. }
  206. // DICTIONARY
  207. const DictionaryRowMaxValueSize = binary.MaxVarintLen64
  208. type DictionaryRow struct {
  209. term []byte
  210. count uint64
  211. field uint16
  212. }
  213. func (dr *DictionaryRow) Key() []byte {
  214. buf := make([]byte, dr.KeySize())
  215. size, _ := dr.KeyTo(buf)
  216. return buf[:size]
  217. }
  218. func (dr *DictionaryRow) KeySize() int {
  219. return dictionaryRowKeySize(dr.term)
  220. }
  221. func dictionaryRowKeySize(term []byte) int {
  222. return len(term) + 3
  223. }
  224. func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) {
  225. return dictionaryRowKeyTo(buf, dr.field, dr.term), nil
  226. }
  227. func dictionaryRowKeyTo(buf []byte, field uint16, term []byte) int {
  228. buf[0] = 'd'
  229. binary.LittleEndian.PutUint16(buf[1:3], field)
  230. size := copy(buf[3:], term)
  231. return size + 3
  232. }
  233. func (dr *DictionaryRow) Value() []byte {
  234. buf := make([]byte, dr.ValueSize())
  235. size, _ := dr.ValueTo(buf)
  236. return buf[:size]
  237. }
  238. func (dr *DictionaryRow) ValueSize() int {
  239. return DictionaryRowMaxValueSize
  240. }
  241. func (dr *DictionaryRow) ValueTo(buf []byte) (int, error) {
  242. used := binary.PutUvarint(buf, dr.count)
  243. return used, nil
  244. }
  245. func (dr *DictionaryRow) String() string {
  246. return fmt.Sprintf("Dictionary Term: `%s` Field: %d Count: %d ", string(dr.term), dr.field, dr.count)
  247. }
  248. func NewDictionaryRow(term []byte, field uint16, count uint64) *DictionaryRow {
  249. return &DictionaryRow{
  250. term: term,
  251. field: field,
  252. count: count,
  253. }
  254. }
  255. func NewDictionaryRowKV(key, value []byte) (*DictionaryRow, error) {
  256. rv, err := NewDictionaryRowK(key)
  257. if err != nil {
  258. return nil, err
  259. }
  260. err = rv.parseDictionaryV(value)
  261. if err != nil {
  262. return nil, err
  263. }
  264. return rv, nil
  265. }
  266. func NewDictionaryRowK(key []byte) (*DictionaryRow, error) {
  267. rv := &DictionaryRow{}
  268. err := rv.parseDictionaryK(key)
  269. if err != nil {
  270. return nil, err
  271. }
  272. return rv, nil
  273. }
  274. func (dr *DictionaryRow) parseDictionaryK(key []byte) error {
  275. dr.field = binary.LittleEndian.Uint16(key[1:3])
  276. if dr.term != nil {
  277. dr.term = dr.term[:0]
  278. }
  279. dr.term = append(dr.term, key[3:]...)
  280. return nil
  281. }
  282. func (dr *DictionaryRow) parseDictionaryV(value []byte) error {
  283. count, err := dictionaryRowParseV(value)
  284. if err != nil {
  285. return err
  286. }
  287. dr.count = count
  288. return nil
  289. }
  290. func dictionaryRowParseV(value []byte) (uint64, error) {
  291. count, nread := binary.Uvarint(value)
  292. if nread <= 0 {
  293. return 0, fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread)
  294. }
  295. return count, nil
  296. }
  297. // TERM FIELD FREQUENCY
  298. type TermVector struct {
  299. field uint16
  300. arrayPositions []uint64
  301. pos uint64
  302. start uint64
  303. end uint64
  304. }
  305. func (tv *TermVector) Size() int {
  306. return reflectStaticSizeTermVector + size.SizeOfPtr +
  307. len(tv.arrayPositions)*size.SizeOfUint64
  308. }
  309. func (tv *TermVector) String() string {
  310. return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions)
  311. }
  312. type TermFrequencyRow struct {
  313. term []byte
  314. doc []byte
  315. freq uint64
  316. vectors []*TermVector
  317. norm float32
  318. field uint16
  319. }
  320. func (tfr *TermFrequencyRow) Size() int {
  321. sizeInBytes := reflectStaticSizeTermFrequencyRow +
  322. len(tfr.term) +
  323. len(tfr.doc)
  324. for _, entry := range tfr.vectors {
  325. sizeInBytes += entry.Size()
  326. }
  327. return sizeInBytes
  328. }
  329. func (tfr *TermFrequencyRow) Term() []byte {
  330. return tfr.term
  331. }
  332. func (tfr *TermFrequencyRow) Freq() uint64 {
  333. return tfr.freq
  334. }
  335. func (tfr *TermFrequencyRow) ScanPrefixForField() []byte {
  336. buf := make([]byte, 3)
  337. buf[0] = 't'
  338. binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
  339. return buf
  340. }
  341. func (tfr *TermFrequencyRow) ScanPrefixForFieldTermPrefix() []byte {
  342. buf := make([]byte, 3+len(tfr.term))
  343. buf[0] = 't'
  344. binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
  345. copy(buf[3:], tfr.term)
  346. return buf
  347. }
  348. func (tfr *TermFrequencyRow) ScanPrefixForFieldTerm() []byte {
  349. buf := make([]byte, 3+len(tfr.term)+1)
  350. buf[0] = 't'
  351. binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
  352. termLen := copy(buf[3:], tfr.term)
  353. buf[3+termLen] = ByteSeparator
  354. return buf
  355. }
  356. func (tfr *TermFrequencyRow) Key() []byte {
  357. buf := make([]byte, tfr.KeySize())
  358. size, _ := tfr.KeyTo(buf)
  359. return buf[:size]
  360. }
  361. func (tfr *TermFrequencyRow) KeySize() int {
  362. return termFrequencyRowKeySize(tfr.term, tfr.doc)
  363. }
  364. func termFrequencyRowKeySize(term, doc []byte) int {
  365. return 3 + len(term) + 1 + len(doc)
  366. }
  367. func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) {
  368. return termFrequencyRowKeyTo(buf, tfr.field, tfr.term, tfr.doc), nil
  369. }
  370. func termFrequencyRowKeyTo(buf []byte, field uint16, term, doc []byte) int {
  371. buf[0] = 't'
  372. binary.LittleEndian.PutUint16(buf[1:3], field)
  373. termLen := copy(buf[3:], term)
  374. buf[3+termLen] = ByteSeparator
  375. docLen := copy(buf[3+termLen+1:], doc)
  376. return 3 + termLen + 1 + docLen
  377. }
  378. func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) {
  379. keySize := tfr.KeySize()
  380. if cap(buf) < keySize {
  381. buf = make([]byte, keySize)
  382. }
  383. actualSize, err := tfr.KeyTo(buf[0:keySize])
  384. return buf[0:actualSize], err
  385. }
  386. func (tfr *TermFrequencyRow) DictionaryRowKey() []byte {
  387. dr := NewDictionaryRow(tfr.term, tfr.field, 0)
  388. return dr.Key()
  389. }
  390. func (tfr *TermFrequencyRow) DictionaryRowKeySize() int {
  391. dr := NewDictionaryRow(tfr.term, tfr.field, 0)
  392. return dr.KeySize()
  393. }
  394. func (tfr *TermFrequencyRow) DictionaryRowKeyTo(buf []byte) (int, error) {
  395. dr := NewDictionaryRow(tfr.term, tfr.field, 0)
  396. return dr.KeyTo(buf)
  397. }
  398. func (tfr *TermFrequencyRow) Value() []byte {
  399. buf := make([]byte, tfr.ValueSize())
  400. size, _ := tfr.ValueTo(buf)
  401. return buf[:size]
  402. }
  403. func (tfr *TermFrequencyRow) ValueSize() int {
  404. bufLen := binary.MaxVarintLen64 + binary.MaxVarintLen64
  405. for _, vector := range tfr.vectors {
  406. bufLen += (binary.MaxVarintLen64 * 4) + (1+len(vector.arrayPositions))*binary.MaxVarintLen64
  407. }
  408. return bufLen
  409. }
  410. func (tfr *TermFrequencyRow) ValueTo(buf []byte) (int, error) {
  411. used := binary.PutUvarint(buf[:binary.MaxVarintLen64], tfr.freq)
  412. normuint32 := math.Float32bits(tfr.norm)
  413. newbuf := buf[used : used+binary.MaxVarintLen64]
  414. used += binary.PutUvarint(newbuf, uint64(normuint32))
  415. for _, vector := range tfr.vectors {
  416. used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], uint64(vector.field))
  417. used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.pos)
  418. used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.start)
  419. used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.end)
  420. used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], uint64(len(vector.arrayPositions)))
  421. for _, arrayPosition := range vector.arrayPositions {
  422. used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], arrayPosition)
  423. }
  424. }
  425. return used, nil
  426. }
  427. func (tfr *TermFrequencyRow) String() string {
  428. return fmt.Sprintf("Term: `%s` Field: %d DocId: `%s` Frequency: %d Norm: %f Vectors: %v", string(tfr.term), tfr.field, string(tfr.doc), tfr.freq, tfr.norm, tfr.vectors)
  429. }
  430. func InitTermFrequencyRow(tfr *TermFrequencyRow, term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow {
  431. tfr.term = term
  432. tfr.field = field
  433. tfr.doc = docID
  434. tfr.freq = freq
  435. tfr.norm = norm
  436. return tfr
  437. }
  438. func NewTermFrequencyRow(term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow {
  439. return &TermFrequencyRow{
  440. term: term,
  441. field: field,
  442. doc: docID,
  443. freq: freq,
  444. norm: norm,
  445. }
  446. }
  447. func NewTermFrequencyRowWithTermVectors(term []byte, field uint16, docID []byte, freq uint64, norm float32, vectors []*TermVector) *TermFrequencyRow {
  448. return &TermFrequencyRow{
  449. term: term,
  450. field: field,
  451. doc: docID,
  452. freq: freq,
  453. norm: norm,
  454. vectors: vectors,
  455. }
  456. }
  457. func NewTermFrequencyRowK(key []byte) (*TermFrequencyRow, error) {
  458. rv := &TermFrequencyRow{}
  459. err := rv.parseK(key)
  460. if err != nil {
  461. return nil, err
  462. }
  463. return rv, nil
  464. }
  465. func (tfr *TermFrequencyRow) parseK(key []byte) error {
  466. keyLen := len(key)
  467. if keyLen < 3 {
  468. return fmt.Errorf("invalid term frequency key, no valid field")
  469. }
  470. tfr.field = binary.LittleEndian.Uint16(key[1:3])
  471. termEndPos := bytes.IndexByte(key[3:], ByteSeparator)
  472. if termEndPos < 0 {
  473. return fmt.Errorf("invalid term frequency key, no byte separator terminating term")
  474. }
  475. tfr.term = key[3 : 3+termEndPos]
  476. docLen := keyLen - (3 + termEndPos + 1)
  477. if docLen < 1 {
  478. return fmt.Errorf("invalid term frequency key, empty docid")
  479. }
  480. tfr.doc = key[3+termEndPos+1:]
  481. return nil
  482. }
  483. func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error {
  484. tfr.doc = key[3+len(term)+1:]
  485. if len(tfr.doc) == 0 {
  486. return fmt.Errorf("invalid term frequency key, empty docid")
  487. }
  488. return nil
  489. }
  490. func (tfr *TermFrequencyRow) parseV(value []byte, includeTermVectors bool) error {
  491. var bytesRead int
  492. tfr.freq, bytesRead = binary.Uvarint(value)
  493. if bytesRead <= 0 {
  494. return fmt.Errorf("invalid term frequency value, invalid frequency")
  495. }
  496. currOffset := bytesRead
  497. var norm uint64
  498. norm, bytesRead = binary.Uvarint(value[currOffset:])
  499. if bytesRead <= 0 {
  500. return fmt.Errorf("invalid term frequency value, no norm")
  501. }
  502. currOffset += bytesRead
  503. tfr.norm = math.Float32frombits(uint32(norm))
  504. tfr.vectors = nil
  505. if !includeTermVectors {
  506. return nil
  507. }
  508. var field uint64
  509. field, bytesRead = binary.Uvarint(value[currOffset:])
  510. for bytesRead > 0 {
  511. currOffset += bytesRead
  512. tv := TermVector{}
  513. tv.field = uint16(field)
  514. // at this point we expect at least one term vector
  515. if tfr.vectors == nil {
  516. tfr.vectors = make([]*TermVector, 0)
  517. }
  518. tv.pos, bytesRead = binary.Uvarint(value[currOffset:])
  519. if bytesRead <= 0 {
  520. return fmt.Errorf("invalid term frequency value, vector contains no position")
  521. }
  522. currOffset += bytesRead
  523. tv.start, bytesRead = binary.Uvarint(value[currOffset:])
  524. if bytesRead <= 0 {
  525. return fmt.Errorf("invalid term frequency value, vector contains no start")
  526. }
  527. currOffset += bytesRead
  528. tv.end, bytesRead = binary.Uvarint(value[currOffset:])
  529. if bytesRead <= 0 {
  530. return fmt.Errorf("invalid term frequency value, vector contains no end")
  531. }
  532. currOffset += bytesRead
  533. var arrayPositionsLen uint64 = 0
  534. arrayPositionsLen, bytesRead = binary.Uvarint(value[currOffset:])
  535. if bytesRead <= 0 {
  536. return fmt.Errorf("invalid term frequency value, vector contains no arrayPositionLen")
  537. }
  538. currOffset += bytesRead
  539. if arrayPositionsLen > 0 {
  540. tv.arrayPositions = make([]uint64, arrayPositionsLen)
  541. for i := 0; uint64(i) < arrayPositionsLen; i++ {
  542. tv.arrayPositions[i], bytesRead = binary.Uvarint(value[currOffset:])
  543. if bytesRead <= 0 {
  544. return fmt.Errorf("invalid term frequency value, vector contains no arrayPosition of index %d", i)
  545. }
  546. currOffset += bytesRead
  547. }
  548. }
  549. tfr.vectors = append(tfr.vectors, &tv)
  550. // try to read next record (may not exist)
  551. field, bytesRead = binary.Uvarint(value[currOffset:])
  552. }
  553. if len(value[currOffset:]) > 0 && bytesRead <= 0 {
  554. return fmt.Errorf("invalid term frequency value, vector field invalid")
  555. }
  556. return nil
  557. }
  558. func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) {
  559. rv, err := NewTermFrequencyRowK(key)
  560. if err != nil {
  561. return nil, err
  562. }
  563. err = rv.parseV(value, true)
  564. if err != nil {
  565. return nil, err
  566. }
  567. return rv, nil
  568. }
  569. type BackIndexRow struct {
  570. doc []byte
  571. termsEntries []*BackIndexTermsEntry
  572. storedEntries []*BackIndexStoreEntry
  573. }
  574. func (br *BackIndexRow) AllTermKeys() [][]byte {
  575. if br == nil {
  576. return nil
  577. }
  578. rv := make([][]byte, 0, len(br.termsEntries)) // FIXME this underestimates severely
  579. for _, termsEntry := range br.termsEntries {
  580. for i := range termsEntry.Terms {
  581. termRow := NewTermFrequencyRow([]byte(termsEntry.Terms[i]), uint16(termsEntry.GetField()), br.doc, 0, 0)
  582. rv = append(rv, termRow.Key())
  583. }
  584. }
  585. return rv
  586. }
  587. func (br *BackIndexRow) AllStoredKeys() [][]byte {
  588. if br == nil {
  589. return nil
  590. }
  591. rv := make([][]byte, len(br.storedEntries))
  592. for i, storedEntry := range br.storedEntries {
  593. storedRow := NewStoredRow(br.doc, uint16(storedEntry.GetField()), storedEntry.GetArrayPositions(), 'x', []byte{})
  594. rv[i] = storedRow.Key()
  595. }
  596. return rv
  597. }
  598. func (br *BackIndexRow) Key() []byte {
  599. buf := make([]byte, br.KeySize())
  600. size, _ := br.KeyTo(buf)
  601. return buf[:size]
  602. }
  603. func (br *BackIndexRow) KeySize() int {
  604. return len(br.doc) + 1
  605. }
  606. func (br *BackIndexRow) KeyTo(buf []byte) (int, error) {
  607. buf[0] = 'b'
  608. used := copy(buf[1:], br.doc)
  609. return used + 1, nil
  610. }
  611. func (br *BackIndexRow) Value() []byte {
  612. buf := make([]byte, br.ValueSize())
  613. size, _ := br.ValueTo(buf)
  614. return buf[:size]
  615. }
  616. func (br *BackIndexRow) ValueSize() int {
  617. birv := &BackIndexRowValue{
  618. TermsEntries: br.termsEntries,
  619. StoredEntries: br.storedEntries,
  620. }
  621. return birv.Size()
  622. }
  623. func (br *BackIndexRow) ValueTo(buf []byte) (int, error) {
  624. birv := &BackIndexRowValue{
  625. TermsEntries: br.termsEntries,
  626. StoredEntries: br.storedEntries,
  627. }
  628. return birv.MarshalTo(buf)
  629. }
  630. func (br *BackIndexRow) String() string {
  631. return fmt.Sprintf("Backindex DocId: `%s` Terms Entries: %v, Stored Entries: %v", string(br.doc), br.termsEntries, br.storedEntries)
  632. }
  633. func NewBackIndexRow(docID []byte, entries []*BackIndexTermsEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow {
  634. return &BackIndexRow{
  635. doc: docID,
  636. termsEntries: entries,
  637. storedEntries: storedFields,
  638. }
  639. }
  640. func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) {
  641. rv := BackIndexRow{}
  642. buf := bytes.NewBuffer(key)
  643. _, err := buf.ReadByte() // type
  644. if err != nil {
  645. return nil, err
  646. }
  647. rv.doc, err = buf.ReadBytes(ByteSeparator)
  648. if err == io.EOF && len(rv.doc) < 1 {
  649. err = fmt.Errorf("invalid doc length 0 - % x", key)
  650. }
  651. if err != nil && err != io.EOF {
  652. return nil, err
  653. } else if err == nil {
  654. rv.doc = rv.doc[:len(rv.doc)-1] // trim off separator byte
  655. }
  656. var birv BackIndexRowValue
  657. err = proto.Unmarshal(value, &birv)
  658. if err != nil {
  659. return nil, err
  660. }
  661. rv.termsEntries = birv.TermsEntries
  662. rv.storedEntries = birv.StoredEntries
  663. return &rv, nil
  664. }
  665. // STORED
  666. type StoredRow struct {
  667. doc []byte
  668. field uint16
  669. arrayPositions []uint64
  670. typ byte
  671. value []byte
  672. }
  673. func (s *StoredRow) Key() []byte {
  674. buf := make([]byte, s.KeySize())
  675. size, _ := s.KeyTo(buf)
  676. return buf[0:size]
  677. }
  678. func (s *StoredRow) KeySize() int {
  679. return 1 + len(s.doc) + 1 + 2 + (binary.MaxVarintLen64 * len(s.arrayPositions))
  680. }
  681. func (s *StoredRow) KeyTo(buf []byte) (int, error) {
  682. docLen := len(s.doc)
  683. buf[0] = 's'
  684. copy(buf[1:], s.doc)
  685. buf[1+docLen] = ByteSeparator
  686. binary.LittleEndian.PutUint16(buf[1+docLen+1:], s.field)
  687. bytesUsed := 1 + docLen + 1 + 2
  688. for _, arrayPosition := range s.arrayPositions {
  689. varbytes := binary.PutUvarint(buf[bytesUsed:], arrayPosition)
  690. bytesUsed += varbytes
  691. }
  692. return bytesUsed, nil
  693. }
  694. func (s *StoredRow) Value() []byte {
  695. buf := make([]byte, s.ValueSize())
  696. size, _ := s.ValueTo(buf)
  697. return buf[:size]
  698. }
  699. func (s *StoredRow) ValueSize() int {
  700. return len(s.value) + 1
  701. }
  702. func (s *StoredRow) ValueTo(buf []byte) (int, error) {
  703. buf[0] = s.typ
  704. used := copy(buf[1:], s.value)
  705. return used + 1, nil
  706. }
  707. func (s *StoredRow) String() string {
  708. return fmt.Sprintf("Document: %s Field %d, Array Positions: %v, Type: %s Value: %s", s.doc, s.field, s.arrayPositions, string(s.typ), s.value)
  709. }
  710. func (s *StoredRow) ScanPrefixForDoc() []byte {
  711. docLen := len(s.doc)
  712. buf := make([]byte, 1+docLen+1)
  713. buf[0] = 's'
  714. copy(buf[1:], s.doc)
  715. buf[1+docLen] = ByteSeparator
  716. return buf
  717. }
  718. func NewStoredRow(docID []byte, field uint16, arrayPositions []uint64, typ byte, value []byte) *StoredRow {
  719. return &StoredRow{
  720. doc: docID,
  721. field: field,
  722. arrayPositions: arrayPositions,
  723. typ: typ,
  724. value: value,
  725. }
  726. }
  727. func NewStoredRowK(key []byte) (*StoredRow, error) {
  728. rv := StoredRow{}
  729. buf := bytes.NewBuffer(key)
  730. _, err := buf.ReadByte() // type
  731. if err != nil {
  732. return nil, err
  733. }
  734. rv.doc, err = buf.ReadBytes(ByteSeparator)
  735. if len(rv.doc) < 2 { // 1 for min doc id length, 1 for separator
  736. err = fmt.Errorf("invalid doc length 0")
  737. return nil, err
  738. }
  739. rv.doc = rv.doc[:len(rv.doc)-1] // trim off separator byte
  740. err = binary.Read(buf, binary.LittleEndian, &rv.field)
  741. if err != nil {
  742. return nil, err
  743. }
  744. rv.arrayPositions = make([]uint64, 0)
  745. nextArrayPos, err := binary.ReadUvarint(buf)
  746. for err == nil {
  747. rv.arrayPositions = append(rv.arrayPositions, nextArrayPos)
  748. nextArrayPos, err = binary.ReadUvarint(buf)
  749. }
  750. return &rv, nil
  751. }
  752. func NewStoredRowKV(key, value []byte) (*StoredRow, error) {
  753. rv, err := NewStoredRowK(key)
  754. if err != nil {
  755. return nil, err
  756. }
  757. rv.typ = value[0]
  758. rv.value = value[1:]
  759. return rv, nil
  760. }
  761. type backIndexFieldTermVisitor func(field uint32, term []byte)
  762. // visitBackIndexRow is designed to process a protobuf encoded
  763. // value, without creating unnecessary garbage. Instead values are passed
  764. // to a callback, inspected first, and only copied if necessary.
  765. // Due to the fact that this borrows from generated code, it must be marnually
  766. // updated if the protobuf definition changes.
  767. //
  768. // This code originates from:
  769. // func (m *BackIndexRowValue) Unmarshal(data []byte) error
  770. // the sections which create garbage or parse unintersting sections
  771. // have been commented out. This was done by design to allow for easier
  772. // merging in the future if that original function is regenerated
  773. func visitBackIndexRow(data []byte, callback backIndexFieldTermVisitor) error {
  774. l := len(data)
  775. iNdEx := 0
  776. for iNdEx < l {
  777. var wire uint64
  778. for shift := uint(0); ; shift += 7 {
  779. if iNdEx >= l {
  780. return io.ErrUnexpectedEOF
  781. }
  782. b := data[iNdEx]
  783. iNdEx++
  784. wire |= (uint64(b) & 0x7F) << shift
  785. if b < 0x80 {
  786. break
  787. }
  788. }
  789. fieldNum := int32(wire >> 3)
  790. wireType := int(wire & 0x7)
  791. switch fieldNum {
  792. case 1:
  793. if wireType != 2 {
  794. return fmt.Errorf("proto: wrong wireType = %d for field TermsEntries", wireType)
  795. }
  796. var msglen int
  797. for shift := uint(0); ; shift += 7 {
  798. if iNdEx >= l {
  799. return io.ErrUnexpectedEOF
  800. }
  801. b := data[iNdEx]
  802. iNdEx++
  803. msglen |= (int(b) & 0x7F) << shift
  804. if b < 0x80 {
  805. break
  806. }
  807. }
  808. postIndex := iNdEx + msglen
  809. if msglen < 0 {
  810. return ErrInvalidLengthUpsidedown
  811. }
  812. if postIndex > l {
  813. return io.ErrUnexpectedEOF
  814. }
  815. // dont parse term entries
  816. // m.TermsEntries = append(m.TermsEntries, &BackIndexTermsEntry{})
  817. // if err := m.TermsEntries[len(m.TermsEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
  818. // return err
  819. // }
  820. // instead, inspect them
  821. if err := visitBackIndexRowFieldTerms(data[iNdEx:postIndex], callback); err != nil {
  822. return err
  823. }
  824. iNdEx = postIndex
  825. case 2:
  826. if wireType != 2 {
  827. return fmt.Errorf("proto: wrong wireType = %d for field StoredEntries", wireType)
  828. }
  829. var msglen int
  830. for shift := uint(0); ; shift += 7 {
  831. if iNdEx >= l {
  832. return io.ErrUnexpectedEOF
  833. }
  834. b := data[iNdEx]
  835. iNdEx++
  836. msglen |= (int(b) & 0x7F) << shift
  837. if b < 0x80 {
  838. break
  839. }
  840. }
  841. postIndex := iNdEx + msglen
  842. if msglen < 0 {
  843. return ErrInvalidLengthUpsidedown
  844. }
  845. if postIndex > l {
  846. return io.ErrUnexpectedEOF
  847. }
  848. // don't parse stored entries
  849. // m.StoredEntries = append(m.StoredEntries, &BackIndexStoreEntry{})
  850. // if err := m.StoredEntries[len(m.StoredEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
  851. // return err
  852. // }
  853. iNdEx = postIndex
  854. default:
  855. var sizeOfWire int
  856. for {
  857. sizeOfWire++
  858. wire >>= 7
  859. if wire == 0 {
  860. break
  861. }
  862. }
  863. iNdEx -= sizeOfWire
  864. skippy, err := skipUpsidedown(data[iNdEx:])
  865. if err != nil {
  866. return err
  867. }
  868. if skippy < 0 {
  869. return ErrInvalidLengthUpsidedown
  870. }
  871. if (iNdEx + skippy) > l {
  872. return io.ErrUnexpectedEOF
  873. }
  874. // don't track unrecognized data
  875. //m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
  876. iNdEx += skippy
  877. }
  878. }
  879. return nil
  880. }
  881. // visitBackIndexRowFieldTerms is designed to process a protobuf encoded
  882. // sub-value within the BackIndexRowValue, without creating unnecessary garbage.
  883. // Instead values are passed to a callback, inspected first, and only copied if
  884. // necessary. Due to the fact that this borrows from generated code, it must
  885. // be marnually updated if the protobuf definition changes.
  886. //
  887. // This code originates from:
  888. // func (m *BackIndexTermsEntry) Unmarshal(data []byte) error {
  889. // the sections which create garbage or parse uninteresting sections
  890. // have been commented out. This was done by design to allow for easier
  891. // merging in the future if that original function is regenerated
  892. func visitBackIndexRowFieldTerms(data []byte, callback backIndexFieldTermVisitor) error {
  893. var theField uint32
  894. var hasFields [1]uint64
  895. l := len(data)
  896. iNdEx := 0
  897. for iNdEx < l {
  898. var wire uint64
  899. for shift := uint(0); ; shift += 7 {
  900. if iNdEx >= l {
  901. return io.ErrUnexpectedEOF
  902. }
  903. b := data[iNdEx]
  904. iNdEx++
  905. wire |= (uint64(b) & 0x7F) << shift
  906. if b < 0x80 {
  907. break
  908. }
  909. }
  910. fieldNum := int32(wire >> 3)
  911. wireType := int(wire & 0x7)
  912. switch fieldNum {
  913. case 1:
  914. if wireType != 0 {
  915. return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
  916. }
  917. var v uint32
  918. for shift := uint(0); ; shift += 7 {
  919. if iNdEx >= l {
  920. return io.ErrUnexpectedEOF
  921. }
  922. b := data[iNdEx]
  923. iNdEx++
  924. v |= (uint32(b) & 0x7F) << shift
  925. if b < 0x80 {
  926. break
  927. }
  928. }
  929. // m.Field = &v
  930. theField = v
  931. hasFields[0] |= uint64(0x00000001)
  932. case 2:
  933. if wireType != 2 {
  934. return fmt.Errorf("proto: wrong wireType = %d for field Terms", wireType)
  935. }
  936. var stringLen uint64
  937. for shift := uint(0); ; shift += 7 {
  938. if iNdEx >= l {
  939. return io.ErrUnexpectedEOF
  940. }
  941. b := data[iNdEx]
  942. iNdEx++
  943. stringLen |= (uint64(b) & 0x7F) << shift
  944. if b < 0x80 {
  945. break
  946. }
  947. }
  948. postIndex := iNdEx + int(stringLen)
  949. if postIndex > l {
  950. return io.ErrUnexpectedEOF
  951. }
  952. //m.Terms = append(m.Terms, string(data[iNdEx:postIndex]))
  953. callback(theField, data[iNdEx:postIndex])
  954. iNdEx = postIndex
  955. default:
  956. var sizeOfWire int
  957. for {
  958. sizeOfWire++
  959. wire >>= 7
  960. if wire == 0 {
  961. break
  962. }
  963. }
  964. iNdEx -= sizeOfWire
  965. skippy, err := skipUpsidedown(data[iNdEx:])
  966. if err != nil {
  967. return err
  968. }
  969. if skippy < 0 {
  970. return ErrInvalidLengthUpsidedown
  971. }
  972. if (iNdEx + skippy) > l {
  973. return io.ErrUnexpectedEOF
  974. }
  975. //m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
  976. iNdEx += skippy
  977. }
  978. }
  979. // if hasFields[0]&uint64(0x00000001) == 0 {
  980. // return new(github_com_golang_protobuf_proto.RequiredNotSetError)
  981. // }
  982. return nil
  983. }
上海开阖软件有限公司 沪ICP备12045867号-1